hyf

Context-aware query service for Radroots
git clone https://radroots.dev/git/hyf.git
Log | Files | Refs | README | LICENSE

commit c18af0d6b2ba7c027fd4088a9c3ffc85cabbef22
parent 81307328f3dcf4814ef0ffe55558124ac7d330b4
Author: triesap <tyson@radroots.org>
Date:   Wed,  8 Apr 2026 18:04:38 +0000

capabilities: add deterministic semantic ranking

- add shared query-analysis and ranking helpers reused across bootstrap mode-a capabilities
- implement deterministic semantic_rank and explain_result through hyf_core and hyf_stdio
- update registry and status truthfulness now that all registered mode-a business capabilities are implemented
- validate no-input launch plus representative success, provenance, control-plane, and invalid-input requests

Diffstat:
Asrc/hyf_core/capabilities/explain_result.mojo | 130+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/hyf_core/capabilities/query_analysis.mojo | 318+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/hyf_core/capabilities/query_rewrite.mojo | 320++++++++-----------------------------------------------------------------------
Asrc/hyf_core/capabilities/ranking_support.mojo | 332+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/hyf_core/capabilities/registry.mojo | 22++++++++++++++++++----
Asrc/hyf_core/capabilities/semantic_rank.mojo | 107+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/hyf_stdio/control/status.mojo | 20++++++++++++++++++--
Msrc/hyf_stdio/server.mojo | 30++++++++++++++++++++++++++++++
8 files changed, 983 insertions(+), 296 deletions(-)

diff --git a/src/hyf_core/capabilities/explain_result.mojo b/src/hyf_core/capabilities/explain_result.mojo @@ -0,0 +1,130 @@ +from std.collections import List + +from mojson import Value, loads + +from hyf_core.capabilities.query_analysis import ( + analyze_query, + build_deterministic_meta, + query_signal_tags, + serialize_extracted_filters, + string_array_value, +) +from hyf_core.capabilities.ranking_support import ( + evaluate_candidate, + parse_single_candidate, +) +from hyf_core.errors import ( + CapabilityResult, + failed_capability, + invalid_input_error, + successful_capability, +) +from hyf_core.provenance import ProvenanceSourceRef +from hyf_core.request_context import RequestContext + + +def _join_reason_summary(reasons: List[String]) -> String: + if len(reasons) == 0: + return "no strong ranking signals were detected" + + if len(reasons) == 1: + return String(reasons[0]) + + var summary = String() + for index in range(len(reasons)): + if index > 0: + if index == len(reasons) - 1: + summary += " and " + else: + summary += ", " + summary += String(reasons[index]) + return summary^ + + +def _build_output( + result_id: String, + score: Int, + reasons: List[String], + matched_terms: List[String], + ranking_hints: List[String], + extracted_filters: Value, + delivery_alignment: String, + distance_band: String, + freshness_band: String, + scope_match: Bool, +) raises -> Value: + var output = loads("{}") + output.set("result_id", Value(String(result_id))) + output.set( + "explanation_kind", + Value("deterministic"), + ) + output.set( + "summary", + Value( + "Result " + + result_id + + " was ranked using deterministic heuristic signals: " + + _join_reason_summary(reasons) + + "." + ), + ) + output.set("score", Value(score)) + output.set("reasons", string_array_value(reasons)) + output.set("matched_terms", string_array_value(matched_terms)) + output.set("ranking_hints", string_array_value(ranking_hints)) + output.set("extracted_filters", extracted_filters) + + var assessment = loads("{}") + assessment.set("delivery_alignment", Value(String(delivery_alignment))) + assessment.set("distance_band", Value(String(distance_band))) + assessment.set("freshness_band", Value(String(freshness_band))) + assessment.set("scope_match", Value(scope_match)) + output.set("signal_assessment", assessment) + return output^ + + +def execute_explain_result( + input: Value, context: RequestContext +) raises -> CapabilityResult: + try: + var analysis = analyze_query(input, context, "explain_result") + var candidate = parse_single_candidate(input, "explain_result") + var evaluation = evaluate_candidate(candidate, analysis, context) + + var signal_tags = query_signal_tags(analysis) + for reason in evaluation.reasons: + signal_tags.append("reason:" + String(reason)) + + var source_refs = List[ProvenanceSourceRef]() + source_refs.append( + ProvenanceSourceRef( + source_kind="candidate", + source_ref="explain_result:candidate", + ) + ) + + return successful_capability( + _build_output( + result_id=evaluation.candidate.id, + score=evaluation.score, + reasons=evaluation.reasons, + matched_terms=evaluation.matched_terms, + ranking_hints=analysis.ranking_hints, + extracted_filters=serialize_extracted_filters( + analysis.extracted_filters + ), + delivery_alignment=evaluation.delivery_alignment, + distance_band=evaluation.distance_band, + freshness_band=evaluation.freshness_band, + scope_match=evaluation.scope_match, + ), + meta=build_deterministic_meta( + context=context, + capability_name="explain_result", + signal_tags=signal_tags, + extra_source_refs=source_refs^, + ), + ) + except e: + return failed_capability(invalid_input_error(String(e))) diff --git a/src/hyf_core/capabilities/query_analysis.mojo b/src/hyf_core/capabilities/query_analysis.mojo @@ -0,0 +1,318 @@ +from std.collections import List + +from mojson import Value, loads + +from hyf_core.provenance import ( + CoreResponseMeta, + ExecutionProvenance, + ProvenanceSourceRef, +) +from hyf_core.request_context import RequestContext + + +def has_key(value: Value, key: String) -> Bool: + for candidate in value.object_keys(): + if candidate == key: + return True + return False + + +def copy_string_list(items: List[String]) -> List[String]: + var copied = List[String]() + for item in items: + copied.append(String(item)) + return copied^ + + +def string_array_value(items: List[String]) raises -> Value: + var array = loads("[]") + for item in items: + array.append(Value(String(item))) + return array^ + + +def collapse_whitespace(text: String) -> String: + var parts = text.split() + var collapsed = String() + var first = True + for part in parts: + if not first: + collapsed += " " + collapsed += String(part) + first = False + return collapsed^ + + +def join_strings(items: List[String]) -> String: + var joined = String() + var first = True + for item in items: + if not first: + joined += " " + joined += String(item) + first = False + return joined^ + + +def normalize_free_text(text: String, mut signals: List[String]) -> String: + var normalized = text.lower() + if normalized != text: + signals.append("lowercase") + + var replaced = normalized + replaced = replaced.replace(",", " ") + replaced = replaced.replace(".", " ") + replaced = replaced.replace("!", " ") + replaced = replaced.replace("?", " ") + replaced = replaced.replace(":", " ") + replaced = replaced.replace(";", " ") + replaced = replaced.replace("/", " ") + replaced = replaced.replace("\\", " ") + replaced = replaced.replace("(", " ") + replaced = replaced.replace(")", " ") + replaced = replaced.replace("[", " ") + replaced = replaced.replace("]", " ") + replaced = replaced.replace("{", " ") + replaced = replaced.replace("}", " ") + replaced = replaced.replace("\"", " ") + replaced = replaced.replace("'", " ") + replaced = replaced.replace("-", " ") + if replaced != normalized: + signals.append("punctuation_trimmed") + + var collapsed = collapse_whitespace(replaced) + if collapsed != replaced: + signals.append("whitespace_collapsed") + + return collapsed^ + + +def contains_token(items: List[String], token: String) -> Bool: + for item in items: + if item == token: + return True + return False + + +def _is_stop_word(token: String) -> Bool: + return ( + token == "a" + or token == "an" + or token == "and" + or token == "for" + or token == "from" + or token == "in" + or token == "me" + or token == "near" + or token == "of" + or token == "on" + or token == "the" + or token == "to" + or token == "with" + ) + + +@fieldwise_init +struct ExtractedFilters(Copyable, Movable): + var local_intent: Bool + var fulfillment: String + var time_window: String + + +@fieldwise_init +struct QueryAnalysis(Copyable, Movable): + var original_text: String + var normalized_text: String + var rewritten_text: String + var query_terms: List[String] + var normalization_signals: List[String] + var ranking_hints: List[String] + var extracted_filters: ExtractedFilters + + +def extract_text_input(input: Value, capability_name: String) raises -> String: + if not input.is_object(): + raise Error(capability_name + " input must be a JSON object") + + if has_key(input, "text"): + var text_value = input["text"] + if not text_value.is_string(): + raise Error( + capability_name + " input field 'text' must be a string" + ) + var collapsed = collapse_whitespace(text_value.string_value()) + if collapsed == "": + raise Error(capability_name + " input text must not be empty") + return collapsed^ + elif has_key(input, "query"): + var query_value = input["query"] + if not query_value.is_string(): + raise Error( + capability_name + " input field 'query' must be a string" + ) + var collapsed = collapse_whitespace(query_value.string_value()) + if collapsed == "": + raise Error(capability_name + " input text must not be empty") + return collapsed^ + else: + raise Error( + capability_name + " input requires 'text' or 'query'" + ) + + +def analyze_query( + input: Value, context: RequestContext, capability_name: String +) raises -> QueryAnalysis: + var original_text = extract_text_input(input, capability_name) + + var normalization_signals = List[String]() + var normalized_text = normalize_free_text(original_text, normalization_signals) + var normalized_tokens = normalized_text.split() + + var query_terms = List[String]() + var ranking_hints = List[String]() + var local_intent = False + var fulfillment = "unspecified" + var time_window = "unspecified" + var removed_stop_words = False + var extracted_filter_tokens = False + + for raw_token in normalized_tokens: + var token = String(raw_token) + if token == "": + continue + + if ( + token == "near" + or token == "me" + or token == "nearby" + or token == "local" + ): + local_intent = True + extracted_filter_tokens = True + continue + + if token == "pickup" or token == "curbside": + fulfillment = "pickup" + extracted_filter_tokens = True + continue + + if token == "delivery" or token == "ship" or token == "shipping": + fulfillment = "delivery" + extracted_filter_tokens = True + continue + + if token == "weekend" or token == "saturday" or token == "sunday": + time_window = "weekend" + extracted_filter_tokens = True + continue + + if _is_stop_word(token): + removed_stop_words = True + continue + + if not contains_token(query_terms, token): + query_terms.append(token) + + if local_intent: + normalization_signals.append("local_intent_detected") + ranking_hints.append("prefer_local_results") + if fulfillment == "pickup": + normalization_signals.append("pickup_filter_detected") + ranking_hints.append("prefer_pickup") + elif fulfillment == "delivery": + normalization_signals.append("delivery_filter_detected") + ranking_hints.append("prefer_delivery") + if time_window == "weekend": + normalization_signals.append("weekend_filter_detected") + ranking_hints.append("prefer_weekend_availability") + if removed_stop_words: + normalization_signals.append("stopwords_removed") + if extracted_filter_tokens: + normalization_signals.append("filter_tokens_extracted") + if context.scope: + ranking_hints.append("respect_scope") + normalization_signals.append("scope_present") + + if len(query_terms) == 0: + query_terms.append(String(normalized_text)) + normalization_signals.append("fallback_to_normalized_query") + + return QueryAnalysis( + original_text=original_text, + normalized_text=normalized_text, + rewritten_text=join_strings(query_terms), + query_terms=query_terms^, + normalization_signals=normalization_signals^, + ranking_hints=ranking_hints^, + extracted_filters=ExtractedFilters( + local_intent=local_intent, + fulfillment=fulfillment, + time_window=time_window, + ), + ) + + +def serialize_extracted_filters(filters: ExtractedFilters) raises -> Value: + var value = loads("{}") + value.set("local_intent", Value(filters.local_intent)) + value.set("fulfillment", Value(String(filters.fulfillment))) + value.set("time_window", Value(String(filters.time_window))) + return value^ + + +def query_signal_tags(analysis: QueryAnalysis) -> List[String]: + var signal_tags = copy_string_list(analysis.normalization_signals) + for hint in analysis.ranking_hints: + signal_tags.append(String(hint)) + return signal_tags^ + + +def build_deterministic_meta( + context: RequestContext, + capability_name: String, + signal_tags: List[String], + extra_source_refs: List[ProvenanceSourceRef], +) -> CoreResponseMeta: + var source_refs = List[ProvenanceSourceRef]() + source_refs.append( + ProvenanceSourceRef( + source_kind="local_input", + source_ref=capability_name + ":input", + ) + ) + for source_ref in extra_source_refs: + source_refs.append( + ProvenanceSourceRef( + source_kind=String(source_ref.source_kind), + source_ref=String(source_ref.source_ref), + ) + ) + if context.scope: + source_refs.append( + ProvenanceSourceRef( + source_kind="request_scope", + source_ref="request_context.scope", + ) + ) + + if context.return_provenance: + return CoreResponseMeta( + mode="a", + backend="heuristic", + latency_ms=0, + provenance=ExecutionProvenance( + kind="deterministic", + signal_tags=copy_string_list(signal_tags), + source_refs=source_refs^, + fallback=None, + evidence_set_id=None, + ), + ) + + return CoreResponseMeta( + mode="a", + backend="heuristic", + latency_ms=0, + provenance=None, + ) diff --git a/src/hyf_core/capabilities/query_rewrite.mojo b/src/hyf_core/capabilities/query_rewrite.mojo @@ -1,317 +1,57 @@ -from std.collections import List, Optional +from std.collections import List from mojson import Value, loads +from hyf_core.capabilities.query_analysis import ( + QueryAnalysis, + analyze_query, + build_deterministic_meta, + query_signal_tags, + serialize_extracted_filters, + string_array_value, +) from hyf_core.errors import ( CapabilityResult, failed_capability, invalid_input_error, successful_capability, ) -from hyf_core.provenance import ( - CoreResponseMeta, - ExecutionProvenance, - ProvenanceSourceRef, -) +from hyf_core.provenance import ProvenanceSourceRef from hyf_core.request_context import RequestContext -def _has_key(value: Value, key: String) -> Bool: - for candidate in value.object_keys(): - if candidate == key: - return True - return False - - -def _copy_strings(items: List[String]) -> List[String]: - var copied = List[String]() - for item in items: - copied.append(String(item)) - return copied^ - - -def _string_array(items: List[String]) raises -> Value: - var array = loads("[]") - for item in items: - array.append(Value(String(item))) - return array^ - - -def _collapse_whitespace(text: String) -> String: - var parts = text.split() - var collapsed = String() - var first = True - for part in parts: - if not first: - collapsed += " " - collapsed += String(part) - first = False - return collapsed^ - - -def _join_strings(items: List[String]) -> String: - var joined = String() - var first = True - for item in items: - if not first: - joined += " " - joined += String(item) - first = False - return joined^ - - -def _normalize_text(text: String, mut signals: List[String]) -> String: - var normalized = text.lower() - if normalized != text: - signals.append("lowercase") - - var replaced = normalized - replaced = replaced.replace(",", " ") - replaced = replaced.replace(".", " ") - replaced = replaced.replace("!", " ") - replaced = replaced.replace("?", " ") - replaced = replaced.replace(":", " ") - replaced = replaced.replace(";", " ") - replaced = replaced.replace("/", " ") - replaced = replaced.replace("\\", " ") - replaced = replaced.replace("(", " ") - replaced = replaced.replace(")", " ") - replaced = replaced.replace("[", " ") - replaced = replaced.replace("]", " ") - replaced = replaced.replace("{", " ") - replaced = replaced.replace("}", " ") - replaced = replaced.replace("\"", " ") - replaced = replaced.replace("'", " ") - replaced = replaced.replace("-", " ") - if replaced != normalized: - signals.append("punctuation_trimmed") - - var collapsed = _collapse_whitespace(replaced) - if collapsed != replaced: - signals.append("whitespace_collapsed") - - return collapsed^ - - -def _contains_token(items: List[String], token: String) -> Bool: - for item in items: - if item == token: - return True - return False - - -def _is_stop_word(token: String) -> Bool: - return ( - token == "a" - or token == "an" - or token == "and" - or token == "for" - or token == "from" - or token == "in" - or token == "me" - or token == "near" - or token == "of" - or token == "on" - or token == "the" - or token == "to" - or token == "with" - ) - - -def _extract_text_input(input: Value) raises -> String: - if not input.is_object(): - raise Error("query_rewrite input must be a JSON object") - - if _has_key(input, "text"): - var text_value = input["text"] - if not text_value.is_string(): - raise Error("query_rewrite input field 'text' must be a string") - var collapsed = _collapse_whitespace(text_value.string_value()) - if collapsed == "": - raise Error("query_rewrite input text must not be empty") - return collapsed^ - elif _has_key(input, "query"): - var query_value = input["query"] - if not query_value.is_string(): - raise Error("query_rewrite input field 'query' must be a string") - var collapsed = _collapse_whitespace(query_value.string_value()) - if collapsed == "": - raise Error("query_rewrite input text must not be empty") - return collapsed^ - else: - raise Error("query_rewrite input requires 'text' or 'query'") - - -def _build_output( - original_text: String, - normalized_text: String, - rewritten_text: String, - query_terms: List[String], - normalization_signals: List[String], - ranking_hints: List[String], - local_intent: Bool, - fulfillment: String, - time_window: String, -) raises -> Value: +def _build_output(analysis: QueryAnalysis) raises -> Value: var output = loads("{}") - output.set("original_text", Value(String(original_text))) - output.set("normalized_text", Value(String(normalized_text))) - output.set("rewritten_text", Value(String(rewritten_text))) - output.set("query_terms", _string_array(query_terms)) - output.set("normalization_signals", _string_array(normalization_signals)) - output.set("ranking_hints", _string_array(ranking_hints)) - - var filters = loads("{}") - filters.set("local_intent", Value(local_intent)) - filters.set("fulfillment", Value(String(fulfillment))) - filters.set("time_window", Value(String(time_window))) - output.set("extracted_filters", filters) - return output^ - - -def _build_meta( - context: RequestContext, - normalization_signals: List[String], - ranking_hints: List[String], -) -> Optional[CoreResponseMeta]: - var source_refs = List[ProvenanceSourceRef]() - source_refs.append( - ProvenanceSourceRef( - source_kind="local_input", - source_ref="query_rewrite:input", - ) + output.set("original_text", Value(String(analysis.original_text))) + output.set("normalized_text", Value(String(analysis.normalized_text))) + output.set("rewritten_text", Value(String(analysis.rewritten_text))) + output.set("query_terms", string_array_value(analysis.query_terms)) + output.set( + "normalization_signals", + string_array_value(analysis.normalization_signals), ) - if context.scope: - source_refs.append( - ProvenanceSourceRef( - source_kind="request_scope", - source_ref="request_context.scope", - ) - ) - - var signal_tags = _copy_strings(normalization_signals) - for hint in ranking_hints: - signal_tags.append(String(hint)) - - if context.return_provenance: - return CoreResponseMeta( - mode="a", - backend="heuristic", - latency_ms=0, - provenance=ExecutionProvenance( - kind="deterministic", - signal_tags=signal_tags^, - source_refs=source_refs^, - fallback=None, - evidence_set_id=None, - ), - ) - - return CoreResponseMeta( - mode="a", - backend="heuristic", - latency_ms=0, - provenance=None, + output.set("ranking_hints", string_array_value(analysis.ranking_hints)) + output.set( + "extracted_filters", + serialize_extracted_filters(analysis.extracted_filters), ) + return output^ def execute_query_rewrite( input: Value, context: RequestContext ) raises -> CapabilityResult: try: - var original_text = _extract_text_input(input) - - var normalization_signals = List[String]() - var normalized_text = _normalize_text(original_text, normalization_signals) - var normalized_tokens = normalized_text.split() - - var query_terms = List[String]() - var ranking_hints = List[String]() - var local_intent = False - var fulfillment = "unspecified" - var time_window = "unspecified" - var removed_stop_words = False - var extracted_filter_tokens = False - - for raw_token in normalized_tokens: - var token = String(raw_token) - if token == "": - continue - - if ( - token == "near" - or token == "me" - or token == "nearby" - or token == "local" - ): - local_intent = True - extracted_filter_tokens = True - continue - - if token == "pickup" or token == "curbside": - fulfillment = "pickup" - extracted_filter_tokens = True - continue - - if token == "delivery" or token == "ship" or token == "shipping": - fulfillment = "delivery" - extracted_filter_tokens = True - continue - - if token == "weekend" or token == "saturday" or token == "sunday": - time_window = "weekend" - extracted_filter_tokens = True - continue - - if _is_stop_word(token): - removed_stop_words = True - continue - - if not _contains_token(query_terms, token): - query_terms.append(token) - - if local_intent: - normalization_signals.append("local_intent_detected") - ranking_hints.append("prefer_local_results") - if fulfillment == "pickup": - normalization_signals.append("pickup_filter_detected") - ranking_hints.append("prefer_pickup") - elif fulfillment == "delivery": - normalization_signals.append("delivery_filter_detected") - ranking_hints.append("prefer_delivery") - if time_window == "weekend": - normalization_signals.append("weekend_filter_detected") - ranking_hints.append("prefer_weekend_availability") - if removed_stop_words: - normalization_signals.append("stopwords_removed") - if extracted_filter_tokens: - normalization_signals.append("filter_tokens_extracted") - if context.scope: - ranking_hints.append("respect_scope") - normalization_signals.append("scope_present") - - if len(query_terms) == 0: - query_terms.append(String(normalized_text)) - normalization_signals.append("fallback_to_normalized_query") - - var rewritten_text = _join_strings(query_terms) + var analysis = analyze_query(input, context, "query_rewrite") + var source_refs = List[ProvenanceSourceRef]() return successful_capability( - _build_output( - original_text=original_text, - normalized_text=normalized_text, - rewritten_text=rewritten_text, - query_terms=query_terms, - normalization_signals=normalization_signals, - ranking_hints=ranking_hints, - local_intent=local_intent, - fulfillment=fulfillment, - time_window=time_window, - ), - meta=_build_meta( + _build_output(analysis), + meta=build_deterministic_meta( context=context, - normalization_signals=normalization_signals, - ranking_hints=ranking_hints, + capability_name="query_rewrite", + signal_tags=query_signal_tags(analysis), + extra_source_refs=source_refs^, ), ) except e: diff --git a/src/hyf_core/capabilities/ranking_support.mojo b/src/hyf_core/capabilities/ranking_support.mojo @@ -0,0 +1,332 @@ +from std.collections import List + +from mojson import Value +from mojson.deserialize import get_float, get_int, get_string + +from hyf_core.capabilities.query_analysis import ( + QueryAnalysis, + collapse_whitespace, + has_key, + normalize_free_text, +) +from hyf_core.request_context import RequestContext + + +@fieldwise_init +struct SemanticCandidate(Copyable, Movable): + var id: String + var title: String + var farm: String + var delivery: String + var distance_km: Float64 + var freshness_minutes: Int + + +@fieldwise_init +struct CandidateEvaluation(Copyable, Movable): + var candidate: SemanticCandidate + var score: Int + var reasons: List[String] + var matched_terms: List[String] + var delivery_alignment: String + var distance_band: String + var freshness_band: String + var scope_match: Bool + + +def _require_object(value: Value, context: String) raises: + if not value.is_object(): + raise Error(context + " must be a JSON object") + + +def _copy_candidate(candidate: SemanticCandidate) -> SemanticCandidate: + return SemanticCandidate( + id=String(candidate.id), + title=String(candidate.title), + farm=String(candidate.farm), + delivery=String(candidate.delivery), + distance_km=candidate.distance_km, + freshness_minutes=candidate.freshness_minutes, + ) + + +def _copy_string_list(items: List[String]) -> List[String]: + var copied = List[String]() + for item in items: + copied.append(String(item)) + return copied^ + + +def _copy_evaluation(evaluation: CandidateEvaluation) -> CandidateEvaluation: + return CandidateEvaluation( + candidate=_copy_candidate(evaluation.candidate), + score=evaluation.score, + reasons=_copy_string_list(evaluation.reasons), + matched_terms=_copy_string_list(evaluation.matched_terms), + delivery_alignment=String(evaluation.delivery_alignment), + distance_band=String(evaluation.distance_band), + freshness_band=String(evaluation.freshness_band), + scope_match=evaluation.scope_match, + ) + + +def _parse_candidate(json: Value, context: String) raises -> SemanticCandidate: + _require_object(json, context) + + var id = get_string(json, "id") + if collapse_whitespace(id) == "": + raise Error(context + " field 'id' must not be empty") + + var title = get_string(json, "title") + if collapse_whitespace(title) == "": + raise Error(context + " field 'title' must not be empty") + + var farm = get_string(json, "farm") + if collapse_whitespace(farm) == "": + raise Error(context + " field 'farm' must not be empty") + + var delivery = get_string(json, "delivery") + if collapse_whitespace(delivery) == "": + raise Error(context + " field 'delivery' must not be empty") + + var distance_km = get_float(json, "distance_km") + if distance_km < 0.0: + raise Error(context + " field 'distance_km' must be non-negative") + + var freshness_minutes = get_int(json, "freshness_minutes") + if freshness_minutes < 0: + raise Error( + context + " field 'freshness_minutes' must be non-negative" + ) + + return SemanticCandidate( + id=collapse_whitespace(id), + title=collapse_whitespace(title), + farm=collapse_whitespace(farm), + delivery=collapse_whitespace(delivery).lower(), + distance_km=distance_km, + freshness_minutes=freshness_minutes, + ) + + +def parse_candidate_array( + input: Value, capability_name: String +) raises -> List[SemanticCandidate]: + _require_object(input, capability_name + " input") + + if not has_key(input, "candidates"): + raise Error(capability_name + " input requires 'candidates'") + + var candidates_value = input["candidates"] + if not candidates_value.is_array(): + raise Error( + capability_name + " input field 'candidates' must be a JSON array" + ) + + var candidates = List[SemanticCandidate]() + for item in candidates_value.array_items(): + candidates.append( + _parse_candidate(item, capability_name + " candidate") + ) + + if len(candidates) == 0: + raise Error( + capability_name + " input field 'candidates' must not be empty" + ) + + return candidates^ + + +def parse_single_candidate( + input: Value, capability_name: String +) raises -> SemanticCandidate: + _require_object(input, capability_name + " input") + + var field_count = 0 + if has_key(input, "candidate"): + field_count += 1 + if has_key(input, "result"): + field_count += 1 + + if field_count == 0: + raise Error( + capability_name + " input requires 'candidate' or 'result'" + ) + if field_count > 1: + raise Error( + capability_name + + " input must not include both 'candidate' and 'result'" + ) + + if has_key(input, "candidate"): + return _parse_candidate( + input["candidate"], capability_name + " candidate" + ) + + return _parse_candidate(input["result"], capability_name + " result") + + +def _normalize_candidate_text(candidate: SemanticCandidate) -> String: + var signals = List[String]() + return normalize_free_text( + candidate.title + " " + candidate.farm, signals + ) + + +def _display_term(term: String) -> String: + return String(term) + + +def _delivery_alignment( + query_delivery: String, candidate_delivery: String +) -> String: + if query_delivery == "unspecified": + return "not_requested" + if query_delivery == candidate_delivery: + return "match" + return "mismatch" + + +def _distance_band(local_intent: Bool, distance_km: Float64) -> String: + if not local_intent: + return "not_considered" + if distance_km <= 5.0: + return "closer" + return "farther" + + +def _freshness_band(freshness_minutes: Int) -> String: + if freshness_minutes <= 10: + return "fresher" + if freshness_minutes <= 30: + return "standard" + return "older" + + +def _scope_match(candidate: SemanticCandidate, context: RequestContext) -> Bool: + if not context.scope: + return False + + var scope = context.scope.value().copy() + for listing_id in scope.listing_ids: + if listing_id == candidate.id: + return True + return False + + +def evaluate_candidate( + candidate: SemanticCandidate, + analysis: QueryAnalysis, + context: RequestContext, +) -> CandidateEvaluation: + var reasons = List[String]() + var matched_terms = List[String]() + var score = 0 + + var normalized_candidate_text = _normalize_candidate_text(candidate) + var scope_match = _scope_match(candidate, context) + if scope_match: + reasons.append("scope match") + score += 40 + + for query_term in analysis.query_terms: + var matched = False + for candidate_term in normalized_candidate_text.split(): + if String(candidate_term) == query_term: + matched = True + break + if matched: + matched_terms.append(String(query_term)) + score += 30 + + if len(matched_terms) > 0: + reasons.append(_display_term(matched_terms[0]) + " match") + + var delivery_alignment = _delivery_alignment( + analysis.extracted_filters.fulfillment, candidate.delivery + ) + if delivery_alignment == "match": + reasons.append(candidate.delivery + " match") + score += 35 + elif delivery_alignment == "mismatch": + reasons.append("delivery mismatch") + score -= 20 + + var distance_band = _distance_band( + analysis.extracted_filters.local_intent, candidate.distance_km + ) + if distance_band == "closer": + reasons.append("closer") + score += 20 + elif distance_band == "farther": + reasons.append("farther") + score += 5 + + var freshness_band = _freshness_band(candidate.freshness_minutes) + if freshness_band == "fresher": + reasons.append("fresher") + score += 15 + elif freshness_band == "older": + score -= 5 + + if analysis.extracted_filters.time_window == "weekend": + score += 2 + + return CandidateEvaluation( + candidate=_copy_candidate(candidate), + score=score, + reasons=reasons^, + matched_terms=matched_terms^, + delivery_alignment=delivery_alignment, + distance_band=distance_band, + freshness_band=freshness_band, + scope_match=scope_match, + ) + + +def _should_precede( + pending: CandidateEvaluation, existing: CandidateEvaluation +) -> Bool: + if pending.score != existing.score: + return pending.score > existing.score + + if pending.scope_match != existing.scope_match: + return pending.scope_match + + if len(pending.matched_terms) != len(existing.matched_terms): + return len(pending.matched_terms) > len(existing.matched_terms) + + if pending.candidate.distance_km != existing.candidate.distance_km: + return pending.candidate.distance_km < existing.candidate.distance_km + + if ( + pending.candidate.freshness_minutes + != existing.candidate.freshness_minutes + ): + return ( + pending.candidate.freshness_minutes + < existing.candidate.freshness_minutes + ) + + return pending.candidate.id < existing.candidate.id + + +def rank_candidates( + candidates: List[SemanticCandidate], + analysis: QueryAnalysis, + context: RequestContext, +) -> List[CandidateEvaluation]: + var ranked = List[CandidateEvaluation]() + for candidate in candidates: + var pending = evaluate_candidate(candidate, analysis, context) + var updated = List[CandidateEvaluation]() + var inserted = False + for existing in ranked: + if not inserted and _should_precede(pending, existing): + updated.append(_copy_evaluation(pending)) + inserted = True + updated.append(_copy_evaluation(existing)) + if not inserted: + updated.append(_copy_evaluation(pending)) + ranked = updated^ + return ranked^ diff --git a/src/hyf_core/capabilities/registry.mojo b/src/hyf_core/capabilities/registry.mojo @@ -37,8 +37,8 @@ def canonical_business_capabilities() -> List[BusinessCapabilityDescriptor]: BusinessCapabilityDescriptor( id="semantic_rank", mode_a_enabled=True, - implemented=False, - callable=False, + implemented=True, + callable=True, mode_b_available=False, disabled_reason="", ) @@ -77,8 +77,8 @@ def canonical_business_capabilities() -> List[BusinessCapabilityDescriptor]: BusinessCapabilityDescriptor( id="explain_result", mode_a_enabled=True, - implemented=False, - callable=False, + implemented=True, + callable=True, mode_b_available=False, disabled_reason="", ) @@ -100,6 +100,14 @@ def bootstrap_capability_count() -> Int: return len(canonical_business_capabilities()) +def implemented_enabled_capability_count() -> Int: + var implemented = 0 + for capability in canonical_business_capabilities(): + if capability.mode_a_enabled and capability.implemented: + implemented += 1 + return implemented + + def bootstrap_enabled_capabilities() -> List[String]: var enabled = List[String]() for capability in canonical_business_capabilities(): @@ -108,6 +116,12 @@ def bootstrap_enabled_capabilities() -> List[String]: return enabled^ +def all_enabled_capabilities_implemented() -> Bool: + return implemented_enabled_capability_count() == len( + bootstrap_enabled_capabilities() + ) + + def deferred_capabilities() -> List[String]: var disabled = List[String]() for capability in canonical_business_capabilities(): diff --git a/src/hyf_core/capabilities/semantic_rank.mojo b/src/hyf_core/capabilities/semantic_rank.mojo @@ -0,0 +1,107 @@ +from std.collections import List + +from mojson import Value, loads + +from hyf_core.capabilities.query_analysis import ( + analyze_query, + build_deterministic_meta, + query_signal_tags, + serialize_extracted_filters, + string_array_value, +) +from hyf_core.capabilities.ranking_support import ( + CandidateEvaluation, + parse_candidate_array, + rank_candidates, +) +from hyf_core.errors import ( + CapabilityResult, + failed_capability, + invalid_input_error, + successful_capability, +) +from hyf_core.provenance import ProvenanceSourceRef +from hyf_core.request_context import RequestContext + + +def _build_scored_candidates( + ranked: List[CandidateEvaluation] +) raises -> Value: + var scored = loads("[]") + for evaluation in ranked: + var candidate = loads("{}") + candidate.set("id", Value(String(evaluation.candidate.id))) + candidate.set("score", Value(evaluation.score)) + candidate.set("matched_terms", string_array_value(evaluation.matched_terms)) + candidate.set("reasons", string_array_value(evaluation.reasons)) + candidate.set( + "delivery_alignment", + Value(String(evaluation.delivery_alignment)), + ) + candidate.set("distance_band", Value(String(evaluation.distance_band))) + candidate.set("freshness_band", Value(String(evaluation.freshness_band))) + candidate.set("scope_match", Value(evaluation.scope_match)) + scored.append(candidate) + return scored^ + + +def _build_output( + ranked: List[CandidateEvaluation], + ranking_hints: List[String], + extracted_filters: Value, +) raises -> Value: + var output = loads("{}") + var ranked_ids = loads("[]") + var reasons = loads("{}") + + for evaluation in ranked: + ranked_ids.append(Value(String(evaluation.candidate.id))) + reasons.set( + String(evaluation.candidate.id), + string_array_value(evaluation.reasons), + ) + + output.set("ranked_ids", ranked_ids) + output.set("reasons", reasons) + output.set("scored_candidates", _build_scored_candidates(ranked)) + output.set("ranking_hints", string_array_value(ranking_hints)) + output.set("extracted_filters", extracted_filters) + return output^ + + +def execute_semantic_rank( + input: Value, context: RequestContext +) raises -> CapabilityResult: + try: + var analysis = analyze_query(input, context, "semantic_rank") + var candidates = parse_candidate_array(input, "semantic_rank") + var ranked = rank_candidates(candidates, analysis, context) + + var signal_tags = query_signal_tags(analysis) + signal_tags.append("candidate_set_evaluated") + + var source_refs = List[ProvenanceSourceRef]() + source_refs.append( + ProvenanceSourceRef( + source_kind="candidate_set", + source_ref="semantic_rank:candidates", + ) + ) + + return successful_capability( + _build_output( + ranked=ranked, + ranking_hints=analysis.ranking_hints, + extracted_filters=serialize_extracted_filters( + analysis.extracted_filters + ), + ), + meta=build_deterministic_meta( + context=context, + capability_name="semantic_rank", + signal_tags=signal_tags, + extra_source_refs=source_refs^, + ), + ) + except e: + return failed_capability(invalid_input_error(String(e))) diff --git a/src/hyf_stdio/control/status.mojo b/src/hyf_stdio/control/status.mojo @@ -3,9 +3,11 @@ from std.collections import List from mojson import Value, loads from hyf_core.capabilities.registry import ( + all_enabled_capabilities_implemented, bootstrap_capability_count, bootstrap_enabled_capabilities, deferred_capabilities, + implemented_enabled_capability_count, ) from hyf_core.request_context import request_context_feature_names @@ -22,7 +24,12 @@ def build_status_output() raises -> Value: output.set("daemon", Value("hyfd")) output.set("transport", Value("stdio")) output.set("request_framing", Value("newline_delimited_json")) - output.set("implementation_status", Value("bootstrap_partial_mode_a")) + output.set( + "implementation_status", + Value("bootstrap_registered_mode_a_ready") + if all_enabled_capabilities_implemented() + else Value("bootstrap_partial_mode_a"), + ) var modes = loads("{}") modes.set("a", Value(True)) @@ -30,7 +37,12 @@ def build_status_output() raises -> Value: output.set("enabled_modes", modes) var backends = loads("{}") - backends.set("mode_a_deterministic", Value("partially_available")) + backends.set( + "mode_a_deterministic", + Value("available") + if all_enabled_capabilities_implemented() + else Value("partially_available"), + ) backends.set("mode_b_model_assisted", Value("unavailable")) output.set("backend_reachability", backends) @@ -41,6 +53,10 @@ def build_status_output() raises -> Value: Value(len(bootstrap_enabled_capabilities())), ) counts.set( + "mode_a_implemented_business_capabilities", + Value(implemented_enabled_capability_count()), + ) + counts.set( "disabled_business_capabilities", Value(len(deferred_capabilities())), ) diff --git a/src/hyf_stdio/server.mojo b/src/hyf_stdio/server.mojo @@ -4,11 +4,13 @@ from std.sys import stdin from mojson import Value +from hyf_core.capabilities.explain_result import execute_explain_result from hyf_core.capabilities.registry import ( is_deferred_capability, is_known_business_capability, ) from hyf_core.capabilities.query_rewrite import execute_query_rewrite +from hyf_core.capabilities.semantic_rank import execute_semantic_rank from hyf_core.errors import CapabilityFailure, CapabilitySuccess from hyf_stdio.codec import decode_request, encode_error, encode_success from hyf_stdio.control.capabilities import build_capabilities_output @@ -100,6 +102,30 @@ def _write_query_rewrite(request: WireRequest, request_id: String) raises: ) +def _write_semantic_rank(request: WireRequest, request_id: String) raises: + var result = execute_semantic_rank( + request.input.clone(), request.context.copy() + ) + if result.failure: + _write_error(_wire_error_from_core_failure(request_id, result.failure.value())) + return + _write_success( + _wire_success_from_core_success(request_id, result.success.value()) + ) + + +def _write_explain_result(request: WireRequest, request_id: String) raises: + var result = execute_explain_result( + request.input.clone(), request.context.copy() + ) + if result.failure: + _write_error(_wire_error_from_core_failure(request_id, result.failure.value())) + return + _write_success( + _wire_success_from_core_success(request_id, result.success.value()) + ) + + def run_stdio_server() raises: if stdin.isatty(): return @@ -130,6 +156,10 @@ def run_stdio_server() raises: ) elif request.capability == "query_rewrite": _write_query_rewrite(request^, request_id) + elif request.capability == "semantic_rank": + _write_semantic_rank(request^, request_id) + elif request.capability == "explain_result": + _write_explain_result(request^, request_id) elif is_deferred_capability(request.capability): _write_error(_disabled_response(request)) elif is_known_business_capability(request.capability):