ranking_support.mojo (13602B)
1 from std.collections import List 2 3 from json import Value 4 from json.deserialize import get_float, get_int, get_string 5 6 from hyf_core.capabilities.query_analysis import ( 7 QueryAnalysis, 8 collapse_whitespace, 9 has_key, 10 normalize_free_text, 11 ) 12 from hyf_core.request_context import RequestContext 13 14 15 @fieldwise_init 16 struct SemanticCandidate(Copyable, Movable): 17 var id: String 18 var title: String 19 var farm: String 20 var delivery: String 21 var distance_km: Float64 22 var freshness_minutes: Int 23 24 25 @fieldwise_init 26 struct CandidateEvaluation(Copyable, Movable): 27 var candidate: SemanticCandidate 28 var score: Int 29 var reasons: List[String] 30 var matched_terms: List[String] 31 var delivery_alignment: String 32 var distance_band: String 33 var freshness_band: String 34 var scope_match: Bool 35 36 37 @fieldwise_init 38 struct SemanticRankRequest(Copyable, Movable): 39 var query_text: String 40 var candidates: List[SemanticCandidate] 41 42 43 @fieldwise_init 44 struct ExplainResultRequest(Copyable, Movable): 45 var query_text: String 46 var candidate: SemanticCandidate 47 48 49 def _require_object(value: Value, context: String) raises: 50 if not value.is_object(): 51 raise Error(context + " must be a JSON object") 52 53 54 def _require_allowed_keys( 55 value: Value, allowed_keys: List[String], context: String 56 ) raises: 57 for key in value.object_keys(): 58 var allowed = False 59 for allowed_key in allowed_keys: 60 if key == allowed_key: 61 allowed = True 62 break 63 if not allowed: 64 raise Error(context + " contains unexpected field '" + key + "'") 65 66 67 def _copy_candidate(candidate: SemanticCandidate) -> SemanticCandidate: 68 return SemanticCandidate( 69 id=String(candidate.id), 70 title=String(candidate.title), 71 farm=String(candidate.farm), 72 delivery=String(candidate.delivery), 73 distance_km=candidate.distance_km, 74 freshness_minutes=candidate.freshness_minutes, 75 ) 76 77 78 def _copy_string_list(items: List[String]) -> List[String]: 79 var copied = List[String]() 80 for item in items: 81 copied.append(String(item)) 82 return copied^ 83 84 85 def _copy_evaluation(evaluation: CandidateEvaluation) -> CandidateEvaluation: 86 return CandidateEvaluation( 87 candidate=_copy_candidate(evaluation.candidate), 88 score=evaluation.score, 89 reasons=_copy_string_list(evaluation.reasons), 90 matched_terms=_copy_string_list(evaluation.matched_terms), 91 delivery_alignment=String(evaluation.delivery_alignment), 92 distance_band=String(evaluation.distance_band), 93 freshness_band=String(evaluation.freshness_band), 94 scope_match=evaluation.scope_match, 95 ) 96 97 98 def _parse_candidate(json: Value, context: String) raises -> SemanticCandidate: 99 _require_object(json, context) 100 101 var allowed_keys = List[String]() 102 allowed_keys.append("id") 103 allowed_keys.append("title") 104 allowed_keys.append("farm") 105 allowed_keys.append("delivery") 106 allowed_keys.append("distance_km") 107 allowed_keys.append("freshness_minutes") 108 _require_allowed_keys(json, allowed_keys, context) 109 110 var id = get_string(json, "id") 111 if collapse_whitespace(id) == "": 112 raise Error(context + " field 'id' must not be empty") 113 114 var title = get_string(json, "title") 115 if collapse_whitespace(title) == "": 116 raise Error(context + " field 'title' must not be empty") 117 118 var farm = get_string(json, "farm") 119 if collapse_whitespace(farm) == "": 120 raise Error(context + " field 'farm' must not be empty") 121 122 var delivery = get_string(json, "delivery") 123 var normalized_delivery = collapse_whitespace(delivery).lower() 124 if normalized_delivery == "": 125 raise Error(context + " field 'delivery' must not be empty") 126 if normalized_delivery != "pickup" and normalized_delivery != "delivery": 127 raise Error( 128 context 129 + " field 'delivery' must be one of 'pickup' or 'delivery'" 130 ) 131 132 var distance_km = get_float(json, "distance_km") 133 if distance_km < 0.0: 134 raise Error(context + " field 'distance_km' must be non-negative") 135 136 var freshness_minutes = get_int(json, "freshness_minutes") 137 if freshness_minutes < 0: 138 raise Error( 139 context + " field 'freshness_minutes' must be non-negative" 140 ) 141 142 return SemanticCandidate( 143 id=collapse_whitespace(id), 144 title=collapse_whitespace(title), 145 farm=collapse_whitespace(farm), 146 delivery=normalized_delivery, 147 distance_km=distance_km, 148 freshness_minutes=freshness_minutes, 149 ) 150 151 152 def _parse_query_text(input: Value, capability_name: String) raises -> String: 153 var field_count = 0 154 if has_key(input, "text"): 155 field_count += 1 156 if has_key(input, "query"): 157 field_count += 1 158 159 if field_count == 0: 160 raise Error( 161 capability_name + " input requires exactly one of 'text' or 'query'" 162 ) 163 if field_count > 1: 164 raise Error( 165 capability_name 166 + " input must provide exactly one of 'text' or 'query'" 167 ) 168 169 var field_name = "text" if has_key(input, "text") else "query" 170 var text_value = input[field_name] 171 if not text_value.is_string(): 172 raise Error( 173 capability_name + " input field '" + field_name + "' must be a string" 174 ) 175 176 var collapsed = collapse_whitespace(text_value.string_value()) 177 if collapsed == "": 178 raise Error(capability_name + " input text must not be empty") 179 return collapsed^ 180 181 182 def parse_semantic_rank_request(input: Value) raises -> SemanticRankRequest: 183 _require_object(input, "semantic_rank input") 184 185 var allowed_keys = List[String]() 186 allowed_keys.append("text") 187 allowed_keys.append("query") 188 allowed_keys.append("candidates") 189 _require_allowed_keys(input, allowed_keys, "semantic_rank input") 190 191 return SemanticRankRequest( 192 query_text=_parse_query_text(input, "semantic_rank"), 193 candidates=parse_candidate_array(input, "semantic_rank"), 194 ) 195 196 197 def parse_explain_result_request(input: Value) raises -> ExplainResultRequest: 198 _require_object(input, "explain_result input") 199 200 var allowed_keys = List[String]() 201 allowed_keys.append("text") 202 allowed_keys.append("query") 203 allowed_keys.append("candidate") 204 allowed_keys.append("result") 205 _require_allowed_keys(input, allowed_keys, "explain_result input") 206 207 return ExplainResultRequest( 208 query_text=_parse_query_text(input, "explain_result"), 209 candidate=parse_single_candidate(input, "explain_result"), 210 ) 211 212 213 def parse_candidate_array( 214 input: Value, capability_name: String 215 ) raises -> List[SemanticCandidate]: 216 _require_object(input, capability_name + " input") 217 218 if not has_key(input, "candidates"): 219 raise Error(capability_name + " input requires 'candidates'") 220 221 var candidates_value = input["candidates"] 222 if not candidates_value.is_array(): 223 raise Error( 224 capability_name + " input field 'candidates' must be a JSON array" 225 ) 226 227 var candidates = List[SemanticCandidate]() 228 var seen_ids = List[String]() 229 for item in candidates_value.array_items(): 230 var candidate = _parse_candidate(item, capability_name + " candidate") 231 for seen_id in seen_ids: 232 if seen_id == candidate.id: 233 raise Error( 234 capability_name 235 + " input contains duplicate candidate id '" 236 + candidate.id 237 + "'" 238 ) 239 seen_ids.append(String(candidate.id)) 240 candidates.append(candidate^) 241 242 if len(candidates) == 0: 243 raise Error( 244 capability_name + " input field 'candidates' must not be empty" 245 ) 246 247 return candidates^ 248 249 250 def parse_single_candidate( 251 input: Value, capability_name: String 252 ) raises -> SemanticCandidate: 253 _require_object(input, capability_name + " input") 254 255 var field_count = 0 256 if has_key(input, "candidate"): 257 field_count += 1 258 if has_key(input, "result"): 259 field_count += 1 260 261 if field_count == 0: 262 raise Error( 263 capability_name + " input requires 'candidate' or 'result'" 264 ) 265 if field_count > 1: 266 raise Error( 267 capability_name 268 + " input must not include both 'candidate' and 'result'" 269 ) 270 271 if has_key(input, "candidate"): 272 return _parse_candidate( 273 input["candidate"], capability_name + " candidate" 274 ) 275 276 return _parse_candidate(input["result"], capability_name + " result") 277 278 279 def _normalize_candidate_text(candidate: SemanticCandidate) -> String: 280 var signals = List[String]() 281 return normalize_free_text( 282 candidate.title + " " + candidate.farm, signals 283 ) 284 285 286 def _display_term(term: String) -> String: 287 return String(term) 288 289 290 def _delivery_alignment( 291 query_delivery: String, candidate_delivery: String 292 ) -> String: 293 if query_delivery == "unspecified": 294 return "not_requested" 295 if query_delivery == candidate_delivery: 296 return "match" 297 return "mismatch" 298 299 300 def _distance_band(local_intent: Bool, distance_km: Float64) -> String: 301 if not local_intent: 302 return "not_considered" 303 if distance_km <= 5.0: 304 return "closer" 305 return "farther" 306 307 308 def _freshness_band(freshness_minutes: Int) -> String: 309 if freshness_minutes <= 10: 310 return "fresher" 311 if freshness_minutes <= 30: 312 return "standard" 313 return "older" 314 315 316 def _scope_match(candidate: SemanticCandidate, context: RequestContext) -> Bool: 317 if not context.scope: 318 return False 319 320 var scope = context.scope.value().copy() 321 for listing_id in scope.listing_ids: 322 if listing_id == candidate.id: 323 return True 324 return False 325 326 327 def evaluate_candidate( 328 candidate: SemanticCandidate, 329 analysis: QueryAnalysis, 330 context: RequestContext, 331 ) -> CandidateEvaluation: 332 var reasons = List[String]() 333 var matched_terms = List[String]() 334 var score = 0 335 336 var normalized_candidate_text = _normalize_candidate_text(candidate) 337 var scope_match = _scope_match(candidate, context) 338 if scope_match: 339 reasons.append("scope match") 340 score += 40 341 342 for query_term in analysis.query_terms: 343 var matched = False 344 for candidate_term in normalized_candidate_text.split(): 345 if String(candidate_term) == query_term: 346 matched = True 347 break 348 if matched: 349 matched_terms.append(String(query_term)) 350 score += 30 351 352 if len(matched_terms) > 0: 353 reasons.append(_display_term(matched_terms[0]) + " match") 354 355 var delivery_alignment = _delivery_alignment( 356 analysis.extracted_filters.fulfillment, candidate.delivery 357 ) 358 if delivery_alignment == "match": 359 reasons.append(candidate.delivery + " match") 360 score += 35 361 elif delivery_alignment == "mismatch": 362 reasons.append("delivery mismatch") 363 score -= 20 364 365 var distance_band = _distance_band( 366 analysis.extracted_filters.local_intent, candidate.distance_km 367 ) 368 if distance_band == "closer": 369 reasons.append("closer") 370 score += 20 371 elif distance_band == "farther": 372 reasons.append("farther") 373 score += 5 374 375 var freshness_band = _freshness_band(candidate.freshness_minutes) 376 if freshness_band == "fresher": 377 reasons.append("fresher") 378 score += 15 379 elif freshness_band == "older": 380 score -= 5 381 382 if analysis.extracted_filters.time_window == "weekend": 383 score += 2 384 385 return CandidateEvaluation( 386 candidate=_copy_candidate(candidate), 387 score=score, 388 reasons=reasons^, 389 matched_terms=matched_terms^, 390 delivery_alignment=delivery_alignment, 391 distance_band=distance_band, 392 freshness_band=freshness_band, 393 scope_match=scope_match, 394 ) 395 396 397 def _should_precede( 398 pending: CandidateEvaluation, existing: CandidateEvaluation 399 ) -> Bool: 400 if pending.score != existing.score: 401 return pending.score > existing.score 402 403 if pending.scope_match != existing.scope_match: 404 return pending.scope_match 405 406 if len(pending.matched_terms) != len(existing.matched_terms): 407 return len(pending.matched_terms) > len(existing.matched_terms) 408 409 if pending.candidate.distance_km != existing.candidate.distance_km: 410 return pending.candidate.distance_km < existing.candidate.distance_km 411 412 if ( 413 pending.candidate.freshness_minutes 414 != existing.candidate.freshness_minutes 415 ): 416 return ( 417 pending.candidate.freshness_minutes 418 < existing.candidate.freshness_minutes 419 ) 420 421 return pending.candidate.id < existing.candidate.id 422 423 424 def rank_candidates( 425 candidates: List[SemanticCandidate], 426 analysis: QueryAnalysis, 427 context: RequestContext, 428 ) -> List[CandidateEvaluation]: 429 var ranked = List[CandidateEvaluation]() 430 for candidate in candidates: 431 var pending = evaluate_candidate(candidate, analysis, context) 432 var updated = List[CandidateEvaluation]() 433 var inserted = False 434 for existing in ranked: 435 if not inserted and _should_precede(pending, existing): 436 updated.append(_copy_evaluation(pending)) 437 inserted = True 438 updated.append(_copy_evaluation(existing)) 439 if not inserted: 440 updated.append(_copy_evaluation(pending)) 441 ranked = updated^ 442 return ranked^