narcolepticchicken commited on
Commit
a3dbabc
·
verified ·
1 Parent(s): fe81924

Fix: handle both v2 (per_seed) and v3 (seeds) output formats

Browse files
Files changed (1) hide show
  1. jobs/analyze_collapse.py +34 -16
jobs/analyze_collapse.py CHANGED
@@ -1,8 +1,8 @@
1
  #!/usr/bin/env python3
2
  """
3
- OCC Collapse Mechanism — Post-Run Analysis Harness v2
4
  =======================================================
5
- Handles both v2 (summary-only) and v3 (per-topic traces) output formats.
6
 
7
  Usage:
8
  python analyze_collapse.py <results.json>
@@ -10,6 +10,7 @@ Usage:
10
 
11
  import json, csv, sys
12
  from pathlib import Path
 
13
 
14
  HYPOTHESIS_RULES = {
15
  "H1_volume_amplification": {
@@ -52,15 +53,32 @@ HYPOTHESIS_RULES = {
52
  def load(path):
53
  with open(path) as f: return json.load(f)
54
 
 
 
 
 
55
  def compute_summary(data):
 
 
 
 
 
 
 
 
 
 
 
56
  s = data.get("summary", {})
57
- if not s:
58
- from collections import defaultdict
59
- acc = defaultdict(list)
60
- for sd in data.get("seeds",{}).values():
61
- for cn, ci in sd.items():
62
- if ci.get("accuracy") is not None: acc[cn].append(ci["accuracy"])
63
- s = {k: {"mean": sum(v)/len(v), "min": min(v), "max": max(v)} for k,v in acc.items()}
 
 
64
  return s
65
 
66
  def make_summary_rows(summary):
@@ -70,8 +88,8 @@ def make_summary_rows(summary):
70
 
71
  def make_retention_rows(data):
72
  rows = []
73
- for sid, sd in data.get("seeds",{}).items():
74
- t = sd.get("equal_3round_traced", {})
75
  if not t or t.get("accuracy") is None: continue
76
  for rd, lbl in [(2,"round2"),(3,"round3")]:
77
  rows.append({"seed": int(sid), "round": rd,
@@ -83,7 +101,7 @@ def make_retention_rows(data):
83
 
84
  def make_per_topic_rows(data):
85
  rows = []
86
- for sid, sd in data.get("seeds",{}).items():
87
  pt = sd.get("equal_3round_traced",{}).get("per_topic_rounds", [])
88
  for p in pt:
89
  rounds = p.get("rounds",[])
@@ -98,7 +116,7 @@ def make_per_topic_rows(data):
98
 
99
  def make_flip_rows(data):
100
  flips, advs = [], []
101
- for sid, sd in data.get("seeds",{}).items():
102
  advs.append({"seed": int(sid),
103
  "total_adversary_flips": sd.get("equal_3round_traced",{}).get("adversary_flips",0)})
104
  for p in sd.get("equal_3round_traced",{}).get("per_topic_rounds",[]):
@@ -113,9 +131,9 @@ def make_flip_rows(data):
113
 
114
  def evaluate(summary_rows, data):
115
  sm = {r["condition"]: r for r in summary_rows}
116
- # Get retention from first seed
117
- fs = list(data.get("seeds",{}).values())[0] if data.get("seeds") else {}
118
- t = fs.get("equal_3round_traced",{})
119
  rr = t.get("honest_retention_round3",0)
120
  fr = t.get("flipped_away_round3",0)
121
  rate = rr/(rr+fr) if (rr+fr)>0 else 1.0
 
1
  #!/usr/bin/env python3
2
  """
3
+ OCC Collapse Mechanism — Post-Run Analysis Harness v2.1
4
  =======================================================
5
+ Handles both v2 (summary-only, per_seed key) and v3 (per-topic traces, seeds key) output formats.
6
 
7
  Usage:
8
  python analyze_collapse.py <results.json>
 
10
 
11
  import json, csv, sys
12
  from pathlib import Path
13
+ from collections import defaultdict
14
 
15
  HYPOTHESIS_RULES = {
16
  "H1_volume_amplification": {
 
53
  def load(path):
54
  with open(path) as f: return json.load(f)
55
 
56
+ def _get_seeds(data):
57
+ """Unify per_seed (v2) and seeds (v3) key."""
58
+ return data.get("seeds", data.get("per_seed", {}))
59
+
60
  def compute_summary(data):
61
+ # Try pre-computed aggregate first (v2 format)
62
+ agg_key = [k for k in data if k.startswith("aggregate_seeds_")]
63
+ if agg_key:
64
+ raw = data[agg_key[0]]
65
+ s = {}
66
+ for cn, ci in raw.items():
67
+ if isinstance(ci, dict) and "mean_accuracy" in ci:
68
+ s[cn] = {"mean": ci["mean_accuracy"], "min": ci["mean_accuracy"], "max": ci["mean_accuracy"]}
69
+ if s:
70
+ return s
71
+ # Try explicit summary (v3 format)
72
  s = data.get("summary", {})
73
+ if s:
74
+ return s
75
+ # Compute from seeds
76
+ acc = defaultdict(list)
77
+ for sd in _get_seeds(data).values():
78
+ for cn, ci in sd.items():
79
+ if isinstance(ci, dict) and ci.get("accuracy") is not None:
80
+ acc[cn].append(ci["accuracy"])
81
+ s = {k: {"mean": sum(v)/len(v), "min": min(v), "max": max(v)} for k,v in acc.items()}
82
  return s
83
 
84
  def make_summary_rows(summary):
 
88
 
89
  def make_retention_rows(data):
90
  rows = []
91
+ for sid, sd in _get_seeds(data).items():
92
+ t = sd.get("equal_3round_traced", sd.get("equal_3round", {}))
93
  if not t or t.get("accuracy") is None: continue
94
  for rd, lbl in [(2,"round2"),(3,"round3")]:
95
  rows.append({"seed": int(sid), "round": rd,
 
101
 
102
  def make_per_topic_rows(data):
103
  rows = []
104
+ for sid, sd in _get_seeds(data).items():
105
  pt = sd.get("equal_3round_traced",{}).get("per_topic_rounds", [])
106
  for p in pt:
107
  rounds = p.get("rounds",[])
 
116
 
117
  def make_flip_rows(data):
118
  flips, advs = [], []
119
+ for sid, sd in _get_seeds(data).items():
120
  advs.append({"seed": int(sid),
121
  "total_adversary_flips": sd.get("equal_3round_traced",{}).get("adversary_flips",0)})
122
  for p in sd.get("equal_3round_traced",{}).get("per_topic_rounds",[]):
 
131
 
132
  def evaluate(summary_rows, data):
133
  sm = {r["condition"]: r for r in summary_rows}
134
+ # Get retention from first seed (v3 only — v2 has no per-topic data)
135
+ fs = list(_get_seeds(data).values())[0] if _get_seeds(data) else {}
136
+ t = fs.get("equal_3round_traced", fs.get("equal_3round", {}))
137
  rr = t.get("honest_retention_round3",0)
138
  fr = t.get("flipped_away_round3",0)
139
  rate = rr/(rr+fr) if (rr+fr)>0 else 1.0