tangle


git clone https://radroots.dev/git/tangle.git
Log | Files | Refs | README | LICENSE

commit 00f506fe318e69a0e4933513c061e3d49215ac4b
parent 194529f5d21901c3e6bc9f7a34fb3f2c1da52b6b
Author: triesap <tyson@radroots.org>
Date:   Sun, 14 Jun 2026 13:40:14 -0700

bench: add explicit profile claim gates

- Add smoke, medium, and production benchmark profiles with named threshold sources.
- Allow strict JSON threshold overrides while rejecting missing, unknown, and zero fields.
- Record profile, threshold source, and production-claim eligibility in benchmark summaries.
- Require target-hardware evidence before production-profile runs can be production-claim eligible.

Diffstat:
Mcrates/tangle_bench/src/bin/tangle_benchmark_report.rs | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mcrates/tangle_bench/src/lib.rs | 401++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 500 insertions(+), 21 deletions(-)

diff --git a/crates/tangle_bench/src/bin/tangle_benchmark_report.rs b/crates/tangle_bench/src/bin/tangle_benchmark_report.rs @@ -5,13 +5,17 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::{SystemTime, UNIX_EPOCH}; -use tangle_bench::{BenchDatasetConfig, BenchmarkRunReport}; +use tangle_bench::{ + BenchDatasetConfig, BenchmarkProfile, BenchmarkProfileName, BenchmarkRunReport, + BenchmarkThresholds, +}; use tangle_runtime::nip11::supported_nips_for_group_capability; +#[derive(Debug)] struct BenchmarkReportArgs { output_root: PathBuf, run_id: String, - config: BenchDatasetConfig, + profile: BenchmarkProfile, } fn main() { @@ -33,7 +37,7 @@ fn run() -> Result<Option<PathBuf>, String> { let artifact_dir = args.output_root.join(&args.run_id); fs::create_dir_all(&artifact_dir).map_err(|error| error.to_string())?; - let report = BenchmarkRunReport::run(args.config)?; + let report = BenchmarkRunReport::run(args.profile)?; let dataset_path = artifact_dir.join("dataset-events.jsonl"); fs::write( &dataset_path, @@ -68,7 +72,11 @@ impl BenchmarkReportArgs { fn parse(args: impl IntoIterator<Item = String>) -> Result<Option<Self>, String> { let mut output_root = PathBuf::from(".local/tangle/benchmarks"); let mut run_id = None; + let mut profile_name = BenchmarkProfileName::Smoke; let mut config = BenchDatasetConfig::smoke(); + let mut dataset_overridden = false; + let mut thresholds_json = None; + let mut target_hardware_evidence = None; let mut args = args.into_iter(); while let Some(arg) = args.next() { match arg.as_str() { @@ -78,22 +86,41 @@ impl BenchmarkReportArgs { "--run-id" => { run_id = Some(require_value("--run-id", args.next())?); } + "--profile" => { + profile_name = + BenchmarkProfileName::parse(&require_value("--profile", args.next())?)?; + } + "--thresholds-json" => { + thresholds_json = Some(PathBuf::from(require_value( + "--thresholds-json", + args.next(), + )?)); + } + "--target-hardware-evidence" => { + target_hardware_evidence = + Some(require_value("--target-hardware-evidence", args.next())?); + } "--group-count" => { config.group_count = parse_count("--group-count", args.next())?; + dataset_overridden = true; } "--public-events-per-group" => { config.public_events_per_group = parse_count("--public-events-per-group", args.next())?; + dataset_overridden = true; } "--private-events-per-group" => { config.private_events_per_group = parse_count("--private-events-per-group", args.next())?; + dataset_overridden = true; } "--public-note-count" => { config.public_note_count = parse_count("--public-note-count", args.next())?; + dataset_overridden = true; } "--member-count" => { config.member_count = parse_count("--member-count", args.next())?; + dataset_overridden = true; } "--help" => return Ok(None), other => return Err(format!("unsupported argument `{other}`")), @@ -101,11 +128,29 @@ impl BenchmarkReportArgs { } let run_id = run_id.unwrap_or_else(default_run_id); validate_run_id(&run_id)?; - let config = config.validate()?; + if dataset_overridden && profile_name != BenchmarkProfileName::Smoke { + return Err( + "dataset size overrides are only supported with the smoke profile".to_owned(), + ); + } + let mut profile = BenchmarkProfile::from_name(profile_name); + if dataset_overridden { + profile = profile.with_dataset_config(config)?; + } + if let Some(path) = thresholds_json { + let raw = fs::read_to_string(&path) + .map_err(|error| format!("failed to read thresholds JSON: {error}"))?; + let thresholds = BenchmarkThresholds::from_json_str(&raw)?; + profile = + profile.with_thresholds(thresholds, format!("file:{}", path_string(&path)))?; + } + if let Some(evidence) = target_hardware_evidence { + profile = profile.with_target_hardware_evidence(evidence)?; + } Ok(Some(Self { output_root, run_id, - config, + profile, })) } } @@ -169,10 +214,73 @@ fn path_string(path: &Path) -> String { fn help_text() -> String { [ - "usage: tangle-benchmark-report [--output-root PATH] [--run-id ID]", + "usage: tangle-benchmark-report [--output-root PATH] [--run-id ID] [--profile smoke|medium|production]", + " [--thresholds-json PATH] [--target-hardware-evidence TEXT]", " [--group-count COUNT] [--public-events-per-group COUNT]", " [--private-events-per-group COUNT] [--public-note-count COUNT]", " [--member-count COUNT]", ] .join("\n") } + +#[cfg(test)] +mod tests { + use super::BenchmarkReportArgs; + use tangle_bench::{BenchDatasetConfig, BenchmarkProfileName}; + + #[test] + fn benchmark_report_args_default_to_smoke_profile() { + let args = BenchmarkReportArgs::parse(["--run-id".to_owned(), "unit".to_owned()]) + .expect("parse") + .expect("args"); + + assert_eq!(args.profile.name(), BenchmarkProfileName::Smoke); + assert_eq!(args.profile.dataset_config(), BenchDatasetConfig::smoke()); + assert_eq!(args.profile.threshold_source(), "builtin:smoke"); + } + + #[test] + fn benchmark_report_args_reject_unknown_profile() { + let error = BenchmarkReportArgs::parse([ + "--profile".to_owned(), + "tiny".to_owned(), + "--run-id".to_owned(), + "unit".to_owned(), + ]) + .expect_err("unknown profile"); + + assert!(error.contains("unknown benchmark profile")); + } + + #[test] + fn benchmark_report_args_reject_dataset_overrides_for_non_smoke_profiles() { + let error = BenchmarkReportArgs::parse([ + "--profile".to_owned(), + "medium".to_owned(), + "--group-count".to_owned(), + "3".to_owned(), + "--run-id".to_owned(), + "unit".to_owned(), + ]) + .expect_err("non-smoke override"); + + assert!(error.contains("dataset size overrides")); + } + + #[test] + fn benchmark_report_args_accept_production_target_hardware_evidence() { + let args = BenchmarkReportArgs::parse([ + "--profile".to_owned(), + "production".to_owned(), + "--target-hardware-evidence".to_owned(), + "target-hardware:prod-node-001".to_owned(), + "--run-id".to_owned(), + "unit".to_owned(), + ]) + .expect("parse") + .expect("args"); + + assert_eq!(args.profile.name(), BenchmarkProfileName::Production); + assert!(args.profile.production_claim_eligible()); + } +} diff --git a/crates/tangle_bench/src/lib.rs b/crates/tangle_bench/src/lib.rs @@ -60,6 +60,14 @@ impl BenchDatasetConfig { Self::new(6, 4, 3, 6, 3) } + pub fn medium() -> Self { + Self::new(24, 8, 6, 24, 5) + } + + pub fn production() -> Self { + Self::new(120, 24, 16, 120, 12) + } + pub fn validate(self) -> Result<Self, String> { if self.group_count < 3 { return Err("group-count must be at least 3".to_owned()); @@ -78,6 +86,150 @@ impl BenchDatasetConfig { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BenchmarkProfileName { + Smoke, + Medium, + Production, +} + +impl BenchmarkProfileName { + pub fn parse(value: &str) -> Result<Self, String> { + match value { + "smoke" => Ok(Self::Smoke), + "medium" => Ok(Self::Medium), + "production" => Ok(Self::Production), + _ => Err(format!( + "unknown benchmark profile `{value}`; expected smoke, medium, or production" + )), + } + } + + pub fn as_str(self) -> &'static str { + match self { + Self::Smoke => "smoke", + Self::Medium => "medium", + Self::Production => "production", + } + } + + pub fn all() -> [Self; 3] { + [Self::Smoke, Self::Medium, Self::Production] + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BenchmarkProfile { + name: BenchmarkProfileName, + dataset_config: BenchDatasetConfig, + thresholds: BenchmarkThresholds, + threshold_source: String, + target_hardware_evidence: Option<String>, +} + +impl BenchmarkProfile { + pub fn from_name(name: BenchmarkProfileName) -> Self { + match name { + BenchmarkProfileName::Smoke => Self::smoke(), + BenchmarkProfileName::Medium => Self::medium(), + BenchmarkProfileName::Production => Self::production(), + } + } + + pub fn smoke() -> Self { + Self::new( + BenchmarkProfileName::Smoke, + BenchDatasetConfig::smoke(), + BenchmarkThresholds::smoke(), + ) + } + + pub fn medium() -> Self { + Self::new( + BenchmarkProfileName::Medium, + BenchDatasetConfig::medium(), + BenchmarkThresholds::medium(), + ) + } + + pub fn production() -> Self { + Self::new( + BenchmarkProfileName::Production, + BenchDatasetConfig::production(), + BenchmarkThresholds::production(), + ) + } + + fn new( + name: BenchmarkProfileName, + dataset_config: BenchDatasetConfig, + thresholds: BenchmarkThresholds, + ) -> Self { + Self { + name, + dataset_config, + thresholds, + threshold_source: format!("builtin:{}", name.as_str()), + target_hardware_evidence: None, + } + } + + pub fn name(&self) -> BenchmarkProfileName { + self.name + } + + pub fn dataset_config(&self) -> BenchDatasetConfig { + self.dataset_config + } + + pub fn thresholds(&self) -> BenchmarkThresholds { + self.thresholds + } + + pub fn threshold_source(&self) -> &str { + &self.threshold_source + } + + pub fn target_hardware_evidence(&self) -> Option<&str> { + self.target_hardware_evidence.as_deref() + } + + pub fn with_dataset_config(mut self, config: BenchDatasetConfig) -> Result<Self, String> { + self.dataset_config = config.validate()?; + Ok(self) + } + + pub fn with_thresholds( + mut self, + thresholds: BenchmarkThresholds, + source: impl Into<String>, + ) -> Result<Self, String> { + let source = source.into(); + if source.is_empty() { + return Err("benchmark threshold source must not be empty".to_owned()); + } + self.thresholds = thresholds; + self.threshold_source = source; + Ok(self) + } + + pub fn with_target_hardware_evidence( + mut self, + evidence: impl Into<String>, + ) -> Result<Self, String> { + let evidence = evidence.into(); + if evidence.is_empty() { + return Err("target hardware evidence must not be empty".to_owned()); + } + self.target_hardware_evidence = Some(evidence); + Ok(self) + } + + pub fn production_claim_eligible(&self) -> bool { + self.name == BenchmarkProfileName::Production && self.target_hardware_evidence.is_some() + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BenchGroupVisibility { Public, Private, @@ -459,7 +611,57 @@ impl BenchmarkThresholds { } } - fn to_json(self) -> serde_json::Value { + pub fn medium() -> Self { + Self { + pocket_query_p95_micros: 2_500_000, + read_gate_p95_micros: 2_500_000, + projection_rebuild_elapsed_micros: 15_000_000, + outbox_replay_elapsed_micros: 15_000_000, + broadcast_lag_p95_micros: 2_500_000, + memory_profile_max_bytes: 768 * 1024 * 1024, + } + } + + pub fn production() -> Self { + Self { + pocket_query_p95_micros: 5_000_000, + read_gate_p95_micros: 5_000_000, + projection_rebuild_elapsed_micros: 60_000_000, + outbox_replay_elapsed_micros: 60_000_000, + broadcast_lag_p95_micros: 5_000_000, + memory_profile_max_bytes: 1024 * 1024 * 1024, + } + } + + pub fn from_json_str(raw: &str) -> Result<Self, String> { + let value = serde_json::from_str::<serde_json::Value>(raw) + .map_err(|error| format!("benchmark thresholds JSON is invalid: {error}"))?; + Self::from_json_value(&value) + } + + pub fn from_json_value(value: &serde_json::Value) -> Result<Self, String> { + let object = value + .as_object() + .ok_or_else(|| "benchmark thresholds JSON must be an object".to_owned())?; + for key in object.keys() { + if !benchmark_threshold_fields().contains(&key.as_str()) { + return Err(format!("unknown benchmark threshold field `{key}`")); + } + } + Ok(Self { + pocket_query_p95_micros: threshold_u64(value, "pocket_query_p95_micros")?, + read_gate_p95_micros: threshold_u64(value, "read_gate_p95_micros")?, + projection_rebuild_elapsed_micros: threshold_u64( + value, + "projection_rebuild_elapsed_micros", + )?, + outbox_replay_elapsed_micros: threshold_u64(value, "outbox_replay_elapsed_micros")?, + broadcast_lag_p95_micros: threshold_u64(value, "broadcast_lag_p95_micros")?, + memory_profile_max_bytes: threshold_u64(value, "memory_profile_max_bytes")?, + }) + } + + pub fn to_json(self) -> serde_json::Value { json!({ "pocket_query_p95_micros": self.pocket_query_p95_micros, "read_gate_p95_micros": self.read_gate_p95_micros, @@ -471,19 +673,47 @@ impl BenchmarkThresholds { } } +fn benchmark_threshold_fields() -> [&'static str; 6] { + [ + "pocket_query_p95_micros", + "read_gate_p95_micros", + "projection_rebuild_elapsed_micros", + "outbox_replay_elapsed_micros", + "broadcast_lag_p95_micros", + "memory_profile_max_bytes", + ] +} + +fn threshold_u64(value: &serde_json::Value, field: &str) -> Result<u64, String> { + let value = value + .get(field) + .ok_or_else(|| format!("missing benchmark threshold field `{field}`"))?; + let Some(value) = value.as_u64() else { + return Err(format!( + "benchmark threshold field `{field}` must be an unsigned integer" + )); + }; + if value == 0 { + return Err(format!( + "benchmark threshold field `{field}` must be greater than zero" + )); + } + Ok(value) +} + #[derive(Debug, Clone, PartialEq)] pub struct BenchmarkRunReport { dataset: BenchDataset, dataset_profile: DatasetProfile, + profile: BenchmarkProfile, scenarios: Vec<ScenarioReport>, - thresholds: BenchmarkThresholds, validation_summary: BTreeMap<String, String>, } impl BenchmarkRunReport { - pub fn run(config: BenchDatasetConfig) -> Result<Self, String> { - let dataset = BenchDataset::generate(config)?; - let thresholds = BenchmarkThresholds::smoke(); + pub fn run(profile: BenchmarkProfile) -> Result<Self, String> { + let dataset = BenchDataset::generate(profile.dataset_config())?; + let thresholds = profile.thresholds(); let pocket_query = run_pocket_query_benchmark(&dataset)?; let read_gate = run_read_gate_benchmark(&dataset)?; let projection_rebuild = run_projection_rebuild_benchmark(&dataset)?; @@ -503,8 +733,8 @@ impl BenchmarkRunReport { Ok(Self { dataset, dataset_profile, + profile, scenarios, - thresholds, validation_summary, }) } @@ -517,6 +747,10 @@ impl BenchmarkRunReport { &self.dataset_profile } + pub fn profile(&self) -> &BenchmarkProfile { + &self.profile + } + pub fn scenarios(&self) -> &[ScenarioReport] { &self.scenarios } @@ -536,10 +770,20 @@ impl BenchmarkRunReport { "schema": 1, "run_id": run_id, "artifact_directory": artifact_directory.to_string_lossy(), + "profile": self.profile.name().as_str(), "dataset": self.dataset_profile.to_json(), "scenarios": self.scenarios.iter().map(ScenarioReport::to_json).collect::<Vec<_>>(), - "thresholds": self.thresholds.to_json(), + "threshold_source": self.profile.threshold_source(), + "thresholds": self.profile.thresholds().to_json(), "validation_summary": self.validation_summary, + "production_claim": { + "eligible": self.profile.production_claim_eligible(), + "profile_required": "production", + "target_hardware_evidence": self + .profile + .target_hardware_evidence() + .unwrap_or("absent") + }, "artifacts": { "summary_json": "summary.json", "dataset_events_jsonl": "dataset-events.jsonl" @@ -1272,10 +1516,11 @@ fn lower_hex(bytes: &[u8]) -> String { #[cfg(test)] mod tests { use super::{ - BenchDataset, BenchDatasetConfig, BenchGroupVisibility, BenchmarkRunReport, - BenchmarkThresholds, SCENARIO_BROADCAST_LAG, SCENARIO_GROUP_READ_GATE_OVERHEAD, - SCENARIO_MEMORY_PROFILE, SCENARIO_OUTBOX_REPLAY, SCENARIO_POCKET_QUERY_VISIBLE_EVENTS, - SCENARIO_PROJECTION_REBUILD, ScenarioReport, generated_state_counts, materialize_dataset, + BenchDataset, BenchDatasetConfig, BenchGroupVisibility, BenchmarkProfile, + BenchmarkProfileName, BenchmarkRunReport, BenchmarkThresholds, SCENARIO_BROADCAST_LAG, + SCENARIO_GROUP_READ_GATE_OVERHEAD, SCENARIO_MEMORY_PROFILE, SCENARIO_OUTBOX_REPLAY, + SCENARIO_POCKET_QUERY_VISIBLE_EVENTS, SCENARIO_PROJECTION_REBUILD, ScenarioReport, + generated_state_counts, materialize_dataset, }; use std::collections::BTreeSet; use tangle_groups::{GroupId, KIND_GROUP_ADMINS, KIND_GROUP_MEMBERS, KIND_GROUP_METADATA}; @@ -1361,8 +1606,8 @@ mod tests { #[test] fn benchmark_suite_runs_all_required_v2_scenarios() { - let report = - BenchmarkRunReport::run(BenchDatasetConfig::new(3, 1, 1, 2, 1)).expect("report"); + let report = BenchmarkRunReport::run(smoke_profile(BenchDatasetConfig::new(3, 1, 1, 2, 1))) + .expect("report"); for name in [ SCENARIO_POCKET_QUERY_VISIBLE_EVENTS, @@ -1388,6 +1633,119 @@ mod tests { } #[test] + fn benchmark_profiles_are_explicit_and_unknown_profiles_fail_closed() { + assert_eq!( + BenchmarkProfileName::all() + .iter() + .map(|profile| profile.as_str()) + .collect::<Vec<_>>(), + vec!["smoke", "medium", "production"] + ); + assert_eq!( + BenchmarkProfileName::parse("smoke") + .expect("smoke") + .as_str(), + "smoke" + ); + assert_eq!( + BenchmarkProfileName::parse("medium") + .expect("medium") + .as_str(), + "medium" + ); + assert_eq!( + BenchmarkProfileName::parse("production") + .expect("production") + .as_str(), + "production" + ); + assert!( + BenchmarkProfileName::parse("local") + .expect_err("unknown") + .contains("unknown benchmark profile") + ); + assert_eq!( + BenchmarkProfile::smoke().dataset_config(), + BenchDatasetConfig::smoke() + ); + assert_eq!( + BenchmarkProfile::medium().dataset_config(), + BenchDatasetConfig::medium() + ); + assert_eq!( + BenchmarkProfile::production().dataset_config(), + BenchDatasetConfig::production() + ); + } + + #[test] + fn benchmark_threshold_json_rejects_missing_unknown_or_zero_fields() { + let valid = BenchmarkThresholds::from_json_value(&BenchmarkThresholds::smoke().to_json()) + .expect("valid thresholds"); + assert_eq!(valid, BenchmarkThresholds::smoke()); + + let missing = serde_json::json!({ + "pocket_query_p95_micros": 1, + "read_gate_p95_micros": 1, + "projection_rebuild_elapsed_micros": 1, + "outbox_replay_elapsed_micros": 1, + "broadcast_lag_p95_micros": 1 + }); + assert!( + BenchmarkThresholds::from_json_value(&missing) + .expect_err("missing") + .contains("memory_profile_max_bytes") + ); + + let unknown = serde_json::json!({ + "pocket_query_p95_micros": 1, + "read_gate_p95_micros": 1, + "projection_rebuild_elapsed_micros": 1, + "outbox_replay_elapsed_micros": 1, + "broadcast_lag_p95_micros": 1, + "memory_profile_max_bytes": 1, + "extra": 1 + }); + assert!( + BenchmarkThresholds::from_json_value(&unknown) + .expect_err("unknown") + .contains("unknown benchmark threshold field") + ); + + let zero = serde_json::json!({ + "pocket_query_p95_micros": 0, + "read_gate_p95_micros": 1, + "projection_rebuild_elapsed_micros": 1, + "outbox_replay_elapsed_micros": 1, + "broadcast_lag_p95_micros": 1, + "memory_profile_max_bytes": 1 + }); + assert!( + BenchmarkThresholds::from_json_value(&zero) + .expect_err("zero") + .contains("greater than zero") + ); + } + + #[test] + fn production_claim_eligibility_requires_production_profile_and_hardware_evidence() { + assert!(!BenchmarkProfile::smoke().production_claim_eligible()); + assert!( + !BenchmarkProfile::smoke() + .with_target_hardware_evidence("target-hardware:ci") + .expect("evidence") + .production_claim_eligible() + ); + assert!(!BenchmarkProfile::production().production_claim_eligible()); + assert!( + BenchmarkProfile::production() + .with_target_hardware_evidence("target-hardware:prod-node-001") + .expect("evidence") + .production_claim_eligible() + ); + } + + #[test] fn benchmark_threshold_validation_rejects_missing_or_failed_scenarios() { let scenarios = vec![ passing_scenario(SCENARIO_POCKET_QUERY_VISIBLE_EVENTS), @@ -1419,12 +1777,19 @@ mod tests { #[test] fn benchmark_summary_json_matches_report_template_surface() { - let report = - BenchmarkRunReport::run(BenchDatasetConfig::new(3, 1, 1, 1, 1)).expect("report"); + let report = BenchmarkRunReport::run(smoke_profile(BenchDatasetConfig::new(3, 1, 1, 1, 1))) + .expect("report"); let summary = report.summary_json("unit-run", std::path::Path::new(".local/unit")); assert_eq!(summary["schema"], 1); assert_eq!(summary["run_id"], "unit-run"); + assert_eq!(summary["profile"], "smoke"); + assert_eq!(summary["threshold_source"], "builtin:smoke"); + assert_eq!(summary["production_claim"]["eligible"], false); + assert_eq!( + summary["production_claim"]["target_hardware_evidence"], + "absent" + ); assert_eq!( summary["dataset"]["fixture_family"], "synthetic repo-owned fixtures" @@ -1500,4 +1865,10 @@ mod tests { fn passing_scenario(name: &str) -> ScenarioReport { ScenarioReport::new(name, 1, 1, 0, 10, vec![1], 128) } + + fn smoke_profile(config: BenchDatasetConfig) -> BenchmarkProfile { + BenchmarkProfile::smoke() + .with_dataset_config(config) + .expect("smoke profile") + } }