hyf

Context-aware query service for Radroots
git clone https://radroots.dev/git/hyf.git
Log | Files | Refs | README | LICENSE

ranking_support.mojo (13602B)


      1 from std.collections import List
      2 
      3 from json import Value
      4 from json.deserialize import get_float, get_int, get_string
      5 
      6 from hyf_core.capabilities.query_analysis import (
      7     QueryAnalysis,
      8     collapse_whitespace,
      9     has_key,
     10     normalize_free_text,
     11 )
     12 from hyf_core.request_context import RequestContext
     13 
     14 
     15 @fieldwise_init
     16 struct SemanticCandidate(Copyable, Movable):
     17     var id: String
     18     var title: String
     19     var farm: String
     20     var delivery: String
     21     var distance_km: Float64
     22     var freshness_minutes: Int
     23 
     24 
     25 @fieldwise_init
     26 struct CandidateEvaluation(Copyable, Movable):
     27     var candidate: SemanticCandidate
     28     var score: Int
     29     var reasons: List[String]
     30     var matched_terms: List[String]
     31     var delivery_alignment: String
     32     var distance_band: String
     33     var freshness_band: String
     34     var scope_match: Bool
     35 
     36 
     37 @fieldwise_init
     38 struct SemanticRankRequest(Copyable, Movable):
     39     var query_text: String
     40     var candidates: List[SemanticCandidate]
     41 
     42 
     43 @fieldwise_init
     44 struct ExplainResultRequest(Copyable, Movable):
     45     var query_text: String
     46     var candidate: SemanticCandidate
     47 
     48 
     49 def _require_object(value: Value, context: String) raises:
     50     if not value.is_object():
     51         raise Error(context + " must be a JSON object")
     52 
     53 
     54 def _require_allowed_keys(
     55     value: Value, allowed_keys: List[String], context: String
     56 ) raises:
     57     for key in value.object_keys():
     58         var allowed = False
     59         for allowed_key in allowed_keys:
     60             if key == allowed_key:
     61                 allowed = True
     62                 break
     63         if not allowed:
     64             raise Error(context + " contains unexpected field '" + key + "'")
     65 
     66 
     67 def _copy_candidate(candidate: SemanticCandidate) -> SemanticCandidate:
     68     return SemanticCandidate(
     69         id=String(candidate.id),
     70         title=String(candidate.title),
     71         farm=String(candidate.farm),
     72         delivery=String(candidate.delivery),
     73         distance_km=candidate.distance_km,
     74         freshness_minutes=candidate.freshness_minutes,
     75     )
     76 
     77 
     78 def _copy_string_list(items: List[String]) -> List[String]:
     79     var copied = List[String]()
     80     for item in items:
     81         copied.append(String(item))
     82     return copied^
     83 
     84 
     85 def _copy_evaluation(evaluation: CandidateEvaluation) -> CandidateEvaluation:
     86     return CandidateEvaluation(
     87         candidate=_copy_candidate(evaluation.candidate),
     88         score=evaluation.score,
     89         reasons=_copy_string_list(evaluation.reasons),
     90         matched_terms=_copy_string_list(evaluation.matched_terms),
     91         delivery_alignment=String(evaluation.delivery_alignment),
     92         distance_band=String(evaluation.distance_band),
     93         freshness_band=String(evaluation.freshness_band),
     94         scope_match=evaluation.scope_match,
     95     )
     96 
     97 
     98 def _parse_candidate(json: Value, context: String) raises -> SemanticCandidate:
     99     _require_object(json, context)
    100 
    101     var allowed_keys = List[String]()
    102     allowed_keys.append("id")
    103     allowed_keys.append("title")
    104     allowed_keys.append("farm")
    105     allowed_keys.append("delivery")
    106     allowed_keys.append("distance_km")
    107     allowed_keys.append("freshness_minutes")
    108     _require_allowed_keys(json, allowed_keys, context)
    109 
    110     var id = get_string(json, "id")
    111     if collapse_whitespace(id) == "":
    112         raise Error(context + " field 'id' must not be empty")
    113 
    114     var title = get_string(json, "title")
    115     if collapse_whitespace(title) == "":
    116         raise Error(context + " field 'title' must not be empty")
    117 
    118     var farm = get_string(json, "farm")
    119     if collapse_whitespace(farm) == "":
    120         raise Error(context + " field 'farm' must not be empty")
    121 
    122     var delivery = get_string(json, "delivery")
    123     var normalized_delivery = collapse_whitespace(delivery).lower()
    124     if normalized_delivery == "":
    125         raise Error(context + " field 'delivery' must not be empty")
    126     if normalized_delivery != "pickup" and normalized_delivery != "delivery":
    127         raise Error(
    128             context
    129             + " field 'delivery' must be one of 'pickup' or 'delivery'"
    130         )
    131 
    132     var distance_km = get_float(json, "distance_km")
    133     if distance_km < 0.0:
    134         raise Error(context + " field 'distance_km' must be non-negative")
    135 
    136     var freshness_minutes = get_int(json, "freshness_minutes")
    137     if freshness_minutes < 0:
    138         raise Error(
    139             context + " field 'freshness_minutes' must be non-negative"
    140         )
    141 
    142     return SemanticCandidate(
    143         id=collapse_whitespace(id),
    144         title=collapse_whitespace(title),
    145         farm=collapse_whitespace(farm),
    146         delivery=normalized_delivery,
    147         distance_km=distance_km,
    148         freshness_minutes=freshness_minutes,
    149     )
    150 
    151 
    152 def _parse_query_text(input: Value, capability_name: String) raises -> String:
    153     var field_count = 0
    154     if has_key(input, "text"):
    155         field_count += 1
    156     if has_key(input, "query"):
    157         field_count += 1
    158 
    159     if field_count == 0:
    160         raise Error(
    161             capability_name + " input requires exactly one of 'text' or 'query'"
    162         )
    163     if field_count > 1:
    164         raise Error(
    165             capability_name
    166             + " input must provide exactly one of 'text' or 'query'"
    167         )
    168 
    169     var field_name = "text" if has_key(input, "text") else "query"
    170     var text_value = input[field_name]
    171     if not text_value.is_string():
    172         raise Error(
    173             capability_name + " input field '" + field_name + "' must be a string"
    174         )
    175 
    176     var collapsed = collapse_whitespace(text_value.string_value())
    177     if collapsed == "":
    178         raise Error(capability_name + " input text must not be empty")
    179     return collapsed^
    180 
    181 
    182 def parse_semantic_rank_request(input: Value) raises -> SemanticRankRequest:
    183     _require_object(input, "semantic_rank input")
    184 
    185     var allowed_keys = List[String]()
    186     allowed_keys.append("text")
    187     allowed_keys.append("query")
    188     allowed_keys.append("candidates")
    189     _require_allowed_keys(input, allowed_keys, "semantic_rank input")
    190 
    191     return SemanticRankRequest(
    192         query_text=_parse_query_text(input, "semantic_rank"),
    193         candidates=parse_candidate_array(input, "semantic_rank"),
    194     )
    195 
    196 
    197 def parse_explain_result_request(input: Value) raises -> ExplainResultRequest:
    198     _require_object(input, "explain_result input")
    199 
    200     var allowed_keys = List[String]()
    201     allowed_keys.append("text")
    202     allowed_keys.append("query")
    203     allowed_keys.append("candidate")
    204     allowed_keys.append("result")
    205     _require_allowed_keys(input, allowed_keys, "explain_result input")
    206 
    207     return ExplainResultRequest(
    208         query_text=_parse_query_text(input, "explain_result"),
    209         candidate=parse_single_candidate(input, "explain_result"),
    210     )
    211 
    212 
    213 def parse_candidate_array(
    214     input: Value, capability_name: String
    215 ) raises -> List[SemanticCandidate]:
    216     _require_object(input, capability_name + " input")
    217 
    218     if not has_key(input, "candidates"):
    219         raise Error(capability_name + " input requires 'candidates'")
    220 
    221     var candidates_value = input["candidates"]
    222     if not candidates_value.is_array():
    223         raise Error(
    224             capability_name + " input field 'candidates' must be a JSON array"
    225         )
    226 
    227     var candidates = List[SemanticCandidate]()
    228     var seen_ids = List[String]()
    229     for item in candidates_value.array_items():
    230         var candidate = _parse_candidate(item, capability_name + " candidate")
    231         for seen_id in seen_ids:
    232             if seen_id == candidate.id:
    233                 raise Error(
    234                     capability_name
    235                     + " input contains duplicate candidate id '"
    236                     + candidate.id
    237                     + "'"
    238                 )
    239         seen_ids.append(String(candidate.id))
    240         candidates.append(candidate^)
    241 
    242     if len(candidates) == 0:
    243         raise Error(
    244             capability_name + " input field 'candidates' must not be empty"
    245         )
    246 
    247     return candidates^
    248 
    249 
    250 def parse_single_candidate(
    251     input: Value, capability_name: String
    252 ) raises -> SemanticCandidate:
    253     _require_object(input, capability_name + " input")
    254 
    255     var field_count = 0
    256     if has_key(input, "candidate"):
    257         field_count += 1
    258     if has_key(input, "result"):
    259         field_count += 1
    260 
    261     if field_count == 0:
    262         raise Error(
    263             capability_name + " input requires 'candidate' or 'result'"
    264         )
    265     if field_count > 1:
    266         raise Error(
    267             capability_name
    268             + " input must not include both 'candidate' and 'result'"
    269         )
    270 
    271     if has_key(input, "candidate"):
    272         return _parse_candidate(
    273             input["candidate"], capability_name + " candidate"
    274         )
    275 
    276     return _parse_candidate(input["result"], capability_name + " result")
    277 
    278 
    279 def _normalize_candidate_text(candidate: SemanticCandidate) -> String:
    280     var signals = List[String]()
    281     return normalize_free_text(
    282         candidate.title + " " + candidate.farm, signals
    283     )
    284 
    285 
    286 def _display_term(term: String) -> String:
    287     return String(term)
    288 
    289 
    290 def _delivery_alignment(
    291     query_delivery: String, candidate_delivery: String
    292 ) -> String:
    293     if query_delivery == "unspecified":
    294         return "not_requested"
    295     if query_delivery == candidate_delivery:
    296         return "match"
    297     return "mismatch"
    298 
    299 
    300 def _distance_band(local_intent: Bool, distance_km: Float64) -> String:
    301     if not local_intent:
    302         return "not_considered"
    303     if distance_km <= 5.0:
    304         return "closer"
    305     return "farther"
    306 
    307 
    308 def _freshness_band(freshness_minutes: Int) -> String:
    309     if freshness_minutes <= 10:
    310         return "fresher"
    311     if freshness_minutes <= 30:
    312         return "standard"
    313     return "older"
    314 
    315 
    316 def _scope_match(candidate: SemanticCandidate, context: RequestContext) -> Bool:
    317     if not context.scope:
    318         return False
    319 
    320     var scope = context.scope.value().copy()
    321     for listing_id in scope.listing_ids:
    322         if listing_id == candidate.id:
    323             return True
    324     return False
    325 
    326 
    327 def evaluate_candidate(
    328     candidate: SemanticCandidate,
    329     analysis: QueryAnalysis,
    330     context: RequestContext,
    331 ) -> CandidateEvaluation:
    332     var reasons = List[String]()
    333     var matched_terms = List[String]()
    334     var score = 0
    335 
    336     var normalized_candidate_text = _normalize_candidate_text(candidate)
    337     var scope_match = _scope_match(candidate, context)
    338     if scope_match:
    339         reasons.append("scope match")
    340         score += 40
    341 
    342     for query_term in analysis.query_terms:
    343         var matched = False
    344         for candidate_term in normalized_candidate_text.split():
    345             if String(candidate_term) == query_term:
    346                 matched = True
    347                 break
    348         if matched:
    349             matched_terms.append(String(query_term))
    350             score += 30
    351 
    352     if len(matched_terms) > 0:
    353         reasons.append(_display_term(matched_terms[0]) + " match")
    354 
    355     var delivery_alignment = _delivery_alignment(
    356         analysis.extracted_filters.fulfillment, candidate.delivery
    357     )
    358     if delivery_alignment == "match":
    359         reasons.append(candidate.delivery + " match")
    360         score += 35
    361     elif delivery_alignment == "mismatch":
    362         reasons.append("delivery mismatch")
    363         score -= 20
    364 
    365     var distance_band = _distance_band(
    366         analysis.extracted_filters.local_intent, candidate.distance_km
    367     )
    368     if distance_band == "closer":
    369         reasons.append("closer")
    370         score += 20
    371     elif distance_band == "farther":
    372         reasons.append("farther")
    373         score += 5
    374 
    375     var freshness_band = _freshness_band(candidate.freshness_minutes)
    376     if freshness_band == "fresher":
    377         reasons.append("fresher")
    378         score += 15
    379     elif freshness_band == "older":
    380         score -= 5
    381 
    382     if analysis.extracted_filters.time_window == "weekend":
    383         score += 2
    384 
    385     return CandidateEvaluation(
    386         candidate=_copy_candidate(candidate),
    387         score=score,
    388         reasons=reasons^,
    389         matched_terms=matched_terms^,
    390         delivery_alignment=delivery_alignment,
    391         distance_band=distance_band,
    392         freshness_band=freshness_band,
    393         scope_match=scope_match,
    394     )
    395 
    396 
    397 def _should_precede(
    398     pending: CandidateEvaluation, existing: CandidateEvaluation
    399 ) -> Bool:
    400     if pending.score != existing.score:
    401         return pending.score > existing.score
    402 
    403     if pending.scope_match != existing.scope_match:
    404         return pending.scope_match
    405 
    406     if len(pending.matched_terms) != len(existing.matched_terms):
    407         return len(pending.matched_terms) > len(existing.matched_terms)
    408 
    409     if pending.candidate.distance_km != existing.candidate.distance_km:
    410         return pending.candidate.distance_km < existing.candidate.distance_km
    411 
    412     if (
    413         pending.candidate.freshness_minutes
    414         != existing.candidate.freshness_minutes
    415     ):
    416         return (
    417             pending.candidate.freshness_minutes
    418             < existing.candidate.freshness_minutes
    419         )
    420 
    421     return pending.candidate.id < existing.candidate.id
    422 
    423 
    424 def rank_candidates(
    425     candidates: List[SemanticCandidate],
    426     analysis: QueryAnalysis,
    427     context: RequestContext,
    428 ) -> List[CandidateEvaluation]:
    429     var ranked = List[CandidateEvaluation]()
    430     for candidate in candidates:
    431         var pending = evaluate_candidate(candidate, analysis, context)
    432         var updated = List[CandidateEvaluation]()
    433         var inserted = False
    434         for existing in ranked:
    435             if not inserted and _should_precede(pending, existing):
    436                 updated.append(_copy_evaluation(pending))
    437                 inserted = True
    438             updated.append(_copy_evaluation(existing))
    439         if not inserted:
    440             updated.append(_copy_evaluation(pending))
    441         ranked = updated^
    442     return ranked^