hyf

Context-aware query service for Radroots
git clone https://radroots.dev/git/hyf.git
Log | Files | Refs | README | LICENSE

commit 342ff98da7db53048d1bfa0eefe36a266f97e3c5
parent 7273b4d6a57a875c605fc5092e168e218f018b2a
Author: triesap <tyson@radroots.org>
Date:   Thu,  9 Apr 2026 02:34:29 +0000

core: harden request validation for deterministic ranking

- require explicit object input on the wire envelope
- reject duplicate candidate ids in semantic_rank requests
- validate candidate delivery against the supported deterministic set
- add unit and stdio regressions for the hardened request errors

Diffstat:
Msrc/hyf_core/capabilities/ranking_support.mojo | 25++++++++++++++++++++-----
Msrc/hyf_stdio/envelope.mojo | 13++++++++++++-
Mtests/test_hyf.mojo | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtests/test_stdio_contract.mojo | 26++++++++++++++++++++++++++
4 files changed, 137 insertions(+), 6 deletions(-)

diff --git a/src/hyf_core/capabilities/ranking_support.mojo b/src/hyf_core/capabilities/ranking_support.mojo @@ -120,8 +120,14 @@ def _parse_candidate(json: Value, context: String) raises -> SemanticCandidate: raise Error(context + " field 'farm' must not be empty") var delivery = get_string(json, "delivery") - if collapse_whitespace(delivery) == "": + var normalized_delivery = collapse_whitespace(delivery).lower() + if normalized_delivery == "": raise Error(context + " field 'delivery' must not be empty") + if normalized_delivery != "pickup" and normalized_delivery != "delivery": + raise Error( + context + + " field 'delivery' must be one of 'pickup' or 'delivery'" + ) var distance_km = get_float(json, "distance_km") if distance_km < 0.0: @@ -137,7 +143,7 @@ def _parse_candidate(json: Value, context: String) raises -> SemanticCandidate: id=collapse_whitespace(id), title=collapse_whitespace(title), farm=collapse_whitespace(farm), - delivery=collapse_whitespace(delivery).lower(), + delivery=normalized_delivery, distance_km=distance_km, freshness_minutes=freshness_minutes, ) @@ -219,10 +225,19 @@ def parse_candidate_array( ) var candidates = List[SemanticCandidate]() + var seen_ids = List[String]() for item in candidates_value.array_items(): - candidates.append( - _parse_candidate(item, capability_name + " candidate") - ) + var candidate = _parse_candidate(item, capability_name + " candidate") + for seen_id in seen_ids: + if seen_id == candidate.id: + raise Error( + capability_name + + " input contains duplicate candidate id '" + + candidate.id + + "'" + ) + seen_ids.append(String(candidate.id)) + candidates.append(candidate^) if len(candidates) == 0: raise Error( diff --git a/src/hyf_stdio/envelope.mojo b/src/hyf_stdio/envelope.mojo @@ -70,6 +70,16 @@ def _parse_optional_trace_id(json: Value) raises -> Optional[String]: return String(trace_id) +def _require_input_value(json: Value) raises -> Value: + if not _has_key(json, "input"): + raise Error("request envelope field 'input' is required") + + var input = json["input"] + if not input.is_object(): + raise Error("request envelope field 'input' must be a JSON object") + return input.clone() + + @fieldwise_init struct WireRequest(Deserializable, Copyable, Movable): var version: Int @@ -98,6 +108,7 @@ struct WireRequest(Deserializable, Copyable, Movable): context_json = json["context"].clone() var context = parse_request_context(context_json) + var input = _require_input_value(json) return Self( version=version, @@ -105,7 +116,7 @@ struct WireRequest(Deserializable, Copyable, Movable): trace_id=trace_id^, capability=capability, context=context^, - input=json["input"].clone(), + input=input^, ) diff --git a/tests/test_hyf.mojo b/tests/test_hyf.mojo @@ -157,6 +157,18 @@ def test_decode_request_rejects_unexpected_field() raises: ) +def test_decode_request_requires_input_object() raises: + with assert_raises(): + _ = decode_request( + '{"version":1,"request_id":"req-no-input-1","capability":"query_rewrite"}' + ) + + with assert_raises(): + _ = decode_request( + '{"version":1,"request_id":"req-bad-input-1","capability":"query_rewrite","input":"eggs"}' + ) + + def test_decode_request_rejects_unsupported_context_field() raises: with assert_raises(): _ = decode_request( @@ -492,6 +504,38 @@ def test_semantic_rank_rejects_unknown_candidate_field() raises: ) +def test_semantic_rank_rejects_duplicate_candidate_ids() raises: + var result = _dispatch( + '{"version":1,"request_id":"rank-dup-1","capability":"semantic_rank","input":{"query":"eggs near me","candidates":[{"id":"lst_dup","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"pickup","distance_km":3.2,"freshness_minutes":2},{"id":"lst_dup","title":"Free range eggs","farm":"Santa Elena","delivery":"delivery","distance_km":8.7,"freshness_minutes":18}]}}' + ) + + assert_equal(Int(result["version"].int_value()), 1) + assert_equal(result["ok"].bool_value(), False) + assert_equal(result["request_id"].string_value(), "rank-dup-1") + assert_equal(result["error"]["code"].string_value(), "invalid_request") + assert_true( + result["error"]["message"].string_value().find("duplicate candidate id") + >= 0 + ) + + +def test_semantic_rank_rejects_invalid_delivery_value() raises: + var result = _dispatch( + '{"version":1,"request_id":"rank-bad-delivery-1","capability":"semantic_rank","input":{"query":"eggs near me","candidates":[{"id":"lst_7ak2","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"ship","distance_km":3.2,"freshness_minutes":2}]}}' + ) + + assert_equal(Int(result["version"].int_value()), 1) + assert_equal(result["ok"].bool_value(), False) + assert_equal( + result["request_id"].string_value(), "rank-bad-delivery-1" + ) + assert_equal(result["error"]["code"].string_value(), "invalid_request") + assert_true( + result["error"]["message"].string_value().find("must be one of") + >= 0 + ) + + def test_explain_result_returns_deterministic_summary_and_provenance() raises: var result = _dispatch( load_scenario_request_json( @@ -554,6 +598,23 @@ def test_explain_result_rejects_unknown_candidate_field() raises: ) +def test_explain_result_rejects_invalid_delivery_value() raises: + var result = _dispatch( + '{"version":1,"request_id":"explain-bad-delivery-1","capability":"explain_result","input":{"query":"eggs near me","candidate":{"id":"lst_7ak2","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"ship","distance_km":3.2,"freshness_minutes":2}}}' + ) + + assert_equal(Int(result["version"].int_value()), 1) + assert_equal(result["ok"].bool_value(), False) + assert_equal( + result["request_id"].string_value(), "explain-bad-delivery-1" + ) + assert_equal(result["error"]["code"].string_value(), "invalid_request") + assert_true( + result["error"]["message"].string_value().find("must be one of") + >= 0 + ) + + def test_semantic_rank_invalid_input_returns_invalid_request() raises: var result = _dispatch( '{"version":1,"request_id":"rank-bad-1","trace_id":"trace-rank-bad-1","capability":"semantic_rank","input":{"query":"eggs near me with weekend pickup","candidates":[]}}' @@ -569,6 +630,24 @@ def test_semantic_rank_invalid_input_returns_invalid_request() raises: ) +def test_missing_input_returns_invalid_request() raises: + var result = _dispatch( + '{"version":1,"request_id":"missing-input-1","trace_id":"trace-missing-input-1","capability":"query_rewrite"}' + ) + + assert_equal(Int(result["version"].int_value()), 1) + assert_equal(result["ok"].bool_value(), False) + assert_equal(result["request_id"].string_value(), "missing-input-1") + assert_equal( + result["trace_id"].string_value(), "trace-missing-input-1" + ) + assert_equal(result["error"]["code"].string_value(), "invalid_request") + assert_true( + result["error"]["message"].string_value().find("field 'input' is required") + >= 0 + ) + + def test_assisted_request_returns_backend_unavailable() raises: var result = _dispatch( load_scenario_request_json("scenarios/assisted_backend_unavailable.json") diff --git a/tests/test_stdio_contract.mojo b/tests/test_stdio_contract.mojo @@ -131,6 +131,32 @@ def test_strict_semantic_rank_failure() raises: ) +def test_duplicate_candidate_ids_fail_explicitly() raises: + var response = run_hyf_stdio( + '{"version":1,"request_id":"rank-dup-proc-1","capability":"semantic_rank","input":{"query":"eggs near me","candidates":[{"id":"lst_dup","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"pickup","distance_km":3.2,"freshness_minutes":2},{"id":"lst_dup","title":"Free range eggs","farm":"Santa Elena","delivery":"delivery","distance_km":8.7,"freshness_minutes":18}]}}' + ) + + assert_true(not response["ok"].bool_value()) + assert_equal(response["error"]["code"].string_value(), "invalid_request") + assert_true( + response["error"]["message"].string_value().find("duplicate candidate id") + >= 0 + ) + + +def test_missing_input_fails_explicitly() raises: + var response = run_hyf_stdio( + '{"version":1,"request_id":"missing-input-proc-1","capability":"query_rewrite"}' + ) + + assert_true(not response["ok"].bool_value()) + assert_equal(response["error"]["code"].string_value(), "invalid_request") + assert_true( + response["error"]["message"].string_value().find("field 'input' is required") + >= 0 + ) + + def test_internal_error_is_bounded_on_wire() raises: var response = run_stdio_entrypoint( "tests/internal_error_stdio_main.mojo",