commit 342ff98da7db53048d1bfa0eefe36a266f97e3c5
parent 7273b4d6a57a875c605fc5092e168e218f018b2a
Author: triesap <tyson@radroots.org>
Date: Thu, 9 Apr 2026 02:34:29 +0000
core: harden request validation for deterministic ranking
- require explicit object input on the wire envelope
- reject duplicate candidate ids in semantic_rank requests
- validate candidate delivery against the supported deterministic set
- add unit and stdio regressions for the hardened request errors
Diffstat:
4 files changed, 137 insertions(+), 6 deletions(-)
diff --git a/src/hyf_core/capabilities/ranking_support.mojo b/src/hyf_core/capabilities/ranking_support.mojo
@@ -120,8 +120,14 @@ def _parse_candidate(json: Value, context: String) raises -> SemanticCandidate:
raise Error(context + " field 'farm' must not be empty")
var delivery = get_string(json, "delivery")
- if collapse_whitespace(delivery) == "":
+ var normalized_delivery = collapse_whitespace(delivery).lower()
+ if normalized_delivery == "":
raise Error(context + " field 'delivery' must not be empty")
+ if normalized_delivery != "pickup" and normalized_delivery != "delivery":
+ raise Error(
+ context
+ + " field 'delivery' must be one of 'pickup' or 'delivery'"
+ )
var distance_km = get_float(json, "distance_km")
if distance_km < 0.0:
@@ -137,7 +143,7 @@ def _parse_candidate(json: Value, context: String) raises -> SemanticCandidate:
id=collapse_whitespace(id),
title=collapse_whitespace(title),
farm=collapse_whitespace(farm),
- delivery=collapse_whitespace(delivery).lower(),
+ delivery=normalized_delivery,
distance_km=distance_km,
freshness_minutes=freshness_minutes,
)
@@ -219,10 +225,19 @@ def parse_candidate_array(
)
var candidates = List[SemanticCandidate]()
+ var seen_ids = List[String]()
for item in candidates_value.array_items():
- candidates.append(
- _parse_candidate(item, capability_name + " candidate")
- )
+ var candidate = _parse_candidate(item, capability_name + " candidate")
+ for seen_id in seen_ids:
+ if seen_id == candidate.id:
+ raise Error(
+ capability_name
+ + " input contains duplicate candidate id '"
+ + candidate.id
+ + "'"
+ )
+ seen_ids.append(String(candidate.id))
+ candidates.append(candidate^)
if len(candidates) == 0:
raise Error(
diff --git a/src/hyf_stdio/envelope.mojo b/src/hyf_stdio/envelope.mojo
@@ -70,6 +70,16 @@ def _parse_optional_trace_id(json: Value) raises -> Optional[String]:
return String(trace_id)
+def _require_input_value(json: Value) raises -> Value:
+ if not _has_key(json, "input"):
+ raise Error("request envelope field 'input' is required")
+
+ var input = json["input"]
+ if not input.is_object():
+ raise Error("request envelope field 'input' must be a JSON object")
+ return input.clone()
+
+
@fieldwise_init
struct WireRequest(Deserializable, Copyable, Movable):
var version: Int
@@ -98,6 +108,7 @@ struct WireRequest(Deserializable, Copyable, Movable):
context_json = json["context"].clone()
var context = parse_request_context(context_json)
+ var input = _require_input_value(json)
return Self(
version=version,
@@ -105,7 +116,7 @@ struct WireRequest(Deserializable, Copyable, Movable):
trace_id=trace_id^,
capability=capability,
context=context^,
- input=json["input"].clone(),
+ input=input^,
)
diff --git a/tests/test_hyf.mojo b/tests/test_hyf.mojo
@@ -157,6 +157,18 @@ def test_decode_request_rejects_unexpected_field() raises:
)
+def test_decode_request_requires_input_object() raises:
+ with assert_raises():
+ _ = decode_request(
+ '{"version":1,"request_id":"req-no-input-1","capability":"query_rewrite"}'
+ )
+
+ with assert_raises():
+ _ = decode_request(
+ '{"version":1,"request_id":"req-bad-input-1","capability":"query_rewrite","input":"eggs"}'
+ )
+
+
def test_decode_request_rejects_unsupported_context_field() raises:
with assert_raises():
_ = decode_request(
@@ -492,6 +504,38 @@ def test_semantic_rank_rejects_unknown_candidate_field() raises:
)
+def test_semantic_rank_rejects_duplicate_candidate_ids() raises:
+ var result = _dispatch(
+ '{"version":1,"request_id":"rank-dup-1","capability":"semantic_rank","input":{"query":"eggs near me","candidates":[{"id":"lst_dup","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"pickup","distance_km":3.2,"freshness_minutes":2},{"id":"lst_dup","title":"Free range eggs","farm":"Santa Elena","delivery":"delivery","distance_km":8.7,"freshness_minutes":18}]}}'
+ )
+
+ assert_equal(Int(result["version"].int_value()), 1)
+ assert_equal(result["ok"].bool_value(), False)
+ assert_equal(result["request_id"].string_value(), "rank-dup-1")
+ assert_equal(result["error"]["code"].string_value(), "invalid_request")
+ assert_true(
+ result["error"]["message"].string_value().find("duplicate candidate id")
+ >= 0
+ )
+
+
+def test_semantic_rank_rejects_invalid_delivery_value() raises:
+ var result = _dispatch(
+ '{"version":1,"request_id":"rank-bad-delivery-1","capability":"semantic_rank","input":{"query":"eggs near me","candidates":[{"id":"lst_7ak2","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"ship","distance_km":3.2,"freshness_minutes":2}]}}'
+ )
+
+ assert_equal(Int(result["version"].int_value()), 1)
+ assert_equal(result["ok"].bool_value(), False)
+ assert_equal(
+ result["request_id"].string_value(), "rank-bad-delivery-1"
+ )
+ assert_equal(result["error"]["code"].string_value(), "invalid_request")
+ assert_true(
+ result["error"]["message"].string_value().find("must be one of")
+ >= 0
+ )
+
+
def test_explain_result_returns_deterministic_summary_and_provenance() raises:
var result = _dispatch(
load_scenario_request_json(
@@ -554,6 +598,23 @@ def test_explain_result_rejects_unknown_candidate_field() raises:
)
+def test_explain_result_rejects_invalid_delivery_value() raises:
+ var result = _dispatch(
+ '{"version":1,"request_id":"explain-bad-delivery-1","capability":"explain_result","input":{"query":"eggs near me","candidate":{"id":"lst_7ak2","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"ship","distance_km":3.2,"freshness_minutes":2}}}'
+ )
+
+ assert_equal(Int(result["version"].int_value()), 1)
+ assert_equal(result["ok"].bool_value(), False)
+ assert_equal(
+ result["request_id"].string_value(), "explain-bad-delivery-1"
+ )
+ assert_equal(result["error"]["code"].string_value(), "invalid_request")
+ assert_true(
+ result["error"]["message"].string_value().find("must be one of")
+ >= 0
+ )
+
+
def test_semantic_rank_invalid_input_returns_invalid_request() raises:
var result = _dispatch(
'{"version":1,"request_id":"rank-bad-1","trace_id":"trace-rank-bad-1","capability":"semantic_rank","input":{"query":"eggs near me with weekend pickup","candidates":[]}}'
@@ -569,6 +630,24 @@ def test_semantic_rank_invalid_input_returns_invalid_request() raises:
)
+def test_missing_input_returns_invalid_request() raises:
+ var result = _dispatch(
+ '{"version":1,"request_id":"missing-input-1","trace_id":"trace-missing-input-1","capability":"query_rewrite"}'
+ )
+
+ assert_equal(Int(result["version"].int_value()), 1)
+ assert_equal(result["ok"].bool_value(), False)
+ assert_equal(result["request_id"].string_value(), "missing-input-1")
+ assert_equal(
+ result["trace_id"].string_value(), "trace-missing-input-1"
+ )
+ assert_equal(result["error"]["code"].string_value(), "invalid_request")
+ assert_true(
+ result["error"]["message"].string_value().find("field 'input' is required")
+ >= 0
+ )
+
+
def test_assisted_request_returns_backend_unavailable() raises:
var result = _dispatch(
load_scenario_request_json("scenarios/assisted_backend_unavailable.json")
diff --git a/tests/test_stdio_contract.mojo b/tests/test_stdio_contract.mojo
@@ -131,6 +131,32 @@ def test_strict_semantic_rank_failure() raises:
)
+def test_duplicate_candidate_ids_fail_explicitly() raises:
+ var response = run_hyf_stdio(
+ '{"version":1,"request_id":"rank-dup-proc-1","capability":"semantic_rank","input":{"query":"eggs near me","candidates":[{"id":"lst_dup","title":"Pasture eggs","farm":"La Huerta del Sur","delivery":"pickup","distance_km":3.2,"freshness_minutes":2},{"id":"lst_dup","title":"Free range eggs","farm":"Santa Elena","delivery":"delivery","distance_km":8.7,"freshness_minutes":18}]}}'
+ )
+
+ assert_true(not response["ok"].bool_value())
+ assert_equal(response["error"]["code"].string_value(), "invalid_request")
+ assert_true(
+ response["error"]["message"].string_value().find("duplicate candidate id")
+ >= 0
+ )
+
+
+def test_missing_input_fails_explicitly() raises:
+ var response = run_hyf_stdio(
+ '{"version":1,"request_id":"missing-input-proc-1","capability":"query_rewrite"}'
+ )
+
+ assert_true(not response["ok"].bool_value())
+ assert_equal(response["error"]["code"].string_value(), "invalid_request")
+ assert_true(
+ response["error"]["message"].string_value().find("field 'input' is required")
+ >= 0
+ )
+
+
def test_internal_error_is_bounded_on_wire() raises:
var response = run_stdio_entrypoint(
"tests/internal_error_stdio_main.mojo",