Fix: handle both v2 (per_seed) and v3 (seeds) output formats
Browse files- jobs/analyze_collapse.py +34 -16
jobs/analyze_collapse.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
OCC Collapse Mechanism — Post-Run Analysis Harness v2
|
| 4 |
=======================================================
|
| 5 |
-
Handles both v2 (summary-only) and v3 (per-topic traces) output formats.
|
| 6 |
|
| 7 |
Usage:
|
| 8 |
python analyze_collapse.py <results.json>
|
|
@@ -10,6 +10,7 @@ Usage:
|
|
| 10 |
|
| 11 |
import json, csv, sys
|
| 12 |
from pathlib import Path
|
|
|
|
| 13 |
|
| 14 |
HYPOTHESIS_RULES = {
|
| 15 |
"H1_volume_amplification": {
|
|
@@ -52,15 +53,32 @@ HYPOTHESIS_RULES = {
|
|
| 52 |
def load(path):
|
| 53 |
with open(path) as f: return json.load(f)
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def compute_summary(data):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
s = data.get("summary", {})
|
| 57 |
-
if
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
return s
|
| 65 |
|
| 66 |
def make_summary_rows(summary):
|
|
@@ -70,8 +88,8 @@ def make_summary_rows(summary):
|
|
| 70 |
|
| 71 |
def make_retention_rows(data):
|
| 72 |
rows = []
|
| 73 |
-
for sid, sd in
|
| 74 |
-
t = sd.get("equal_3round_traced", {})
|
| 75 |
if not t or t.get("accuracy") is None: continue
|
| 76 |
for rd, lbl in [(2,"round2"),(3,"round3")]:
|
| 77 |
rows.append({"seed": int(sid), "round": rd,
|
|
@@ -83,7 +101,7 @@ def make_retention_rows(data):
|
|
| 83 |
|
| 84 |
def make_per_topic_rows(data):
|
| 85 |
rows = []
|
| 86 |
-
for sid, sd in
|
| 87 |
pt = sd.get("equal_3round_traced",{}).get("per_topic_rounds", [])
|
| 88 |
for p in pt:
|
| 89 |
rounds = p.get("rounds",[])
|
|
@@ -98,7 +116,7 @@ def make_per_topic_rows(data):
|
|
| 98 |
|
| 99 |
def make_flip_rows(data):
|
| 100 |
flips, advs = [], []
|
| 101 |
-
for sid, sd in
|
| 102 |
advs.append({"seed": int(sid),
|
| 103 |
"total_adversary_flips": sd.get("equal_3round_traced",{}).get("adversary_flips",0)})
|
| 104 |
for p in sd.get("equal_3round_traced",{}).get("per_topic_rounds",[]):
|
|
@@ -113,9 +131,9 @@ def make_flip_rows(data):
|
|
| 113 |
|
| 114 |
def evaluate(summary_rows, data):
|
| 115 |
sm = {r["condition"]: r for r in summary_rows}
|
| 116 |
-
# Get retention from first seed
|
| 117 |
-
fs = list(
|
| 118 |
-
t = fs.get("equal_3round_traced",{})
|
| 119 |
rr = t.get("honest_retention_round3",0)
|
| 120 |
fr = t.get("flipped_away_round3",0)
|
| 121 |
rate = rr/(rr+fr) if (rr+fr)>0 else 1.0
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
OCC Collapse Mechanism — Post-Run Analysis Harness v2.1
|
| 4 |
=======================================================
|
| 5 |
+
Handles both v2 (summary-only, per_seed key) and v3 (per-topic traces, seeds key) output formats.
|
| 6 |
|
| 7 |
Usage:
|
| 8 |
python analyze_collapse.py <results.json>
|
|
|
|
| 10 |
|
| 11 |
import json, csv, sys
|
| 12 |
from pathlib import Path
|
| 13 |
+
from collections import defaultdict
|
| 14 |
|
| 15 |
HYPOTHESIS_RULES = {
|
| 16 |
"H1_volume_amplification": {
|
|
|
|
| 53 |
def load(path):
|
| 54 |
with open(path) as f: return json.load(f)
|
| 55 |
|
| 56 |
+
def _get_seeds(data):
|
| 57 |
+
"""Unify per_seed (v2) and seeds (v3) key."""
|
| 58 |
+
return data.get("seeds", data.get("per_seed", {}))
|
| 59 |
+
|
| 60 |
def compute_summary(data):
|
| 61 |
+
# Try pre-computed aggregate first (v2 format)
|
| 62 |
+
agg_key = [k for k in data if k.startswith("aggregate_seeds_")]
|
| 63 |
+
if agg_key:
|
| 64 |
+
raw = data[agg_key[0]]
|
| 65 |
+
s = {}
|
| 66 |
+
for cn, ci in raw.items():
|
| 67 |
+
if isinstance(ci, dict) and "mean_accuracy" in ci:
|
| 68 |
+
s[cn] = {"mean": ci["mean_accuracy"], "min": ci["mean_accuracy"], "max": ci["mean_accuracy"]}
|
| 69 |
+
if s:
|
| 70 |
+
return s
|
| 71 |
+
# Try explicit summary (v3 format)
|
| 72 |
s = data.get("summary", {})
|
| 73 |
+
if s:
|
| 74 |
+
return s
|
| 75 |
+
# Compute from seeds
|
| 76 |
+
acc = defaultdict(list)
|
| 77 |
+
for sd in _get_seeds(data).values():
|
| 78 |
+
for cn, ci in sd.items():
|
| 79 |
+
if isinstance(ci, dict) and ci.get("accuracy") is not None:
|
| 80 |
+
acc[cn].append(ci["accuracy"])
|
| 81 |
+
s = {k: {"mean": sum(v)/len(v), "min": min(v), "max": max(v)} for k,v in acc.items()}
|
| 82 |
return s
|
| 83 |
|
| 84 |
def make_summary_rows(summary):
|
|
|
|
| 88 |
|
| 89 |
def make_retention_rows(data):
|
| 90 |
rows = []
|
| 91 |
+
for sid, sd in _get_seeds(data).items():
|
| 92 |
+
t = sd.get("equal_3round_traced", sd.get("equal_3round", {}))
|
| 93 |
if not t or t.get("accuracy") is None: continue
|
| 94 |
for rd, lbl in [(2,"round2"),(3,"round3")]:
|
| 95 |
rows.append({"seed": int(sid), "round": rd,
|
|
|
|
| 101 |
|
| 102 |
def make_per_topic_rows(data):
|
| 103 |
rows = []
|
| 104 |
+
for sid, sd in _get_seeds(data).items():
|
| 105 |
pt = sd.get("equal_3round_traced",{}).get("per_topic_rounds", [])
|
| 106 |
for p in pt:
|
| 107 |
rounds = p.get("rounds",[])
|
|
|
|
| 116 |
|
| 117 |
def make_flip_rows(data):
|
| 118 |
flips, advs = [], []
|
| 119 |
+
for sid, sd in _get_seeds(data).items():
|
| 120 |
advs.append({"seed": int(sid),
|
| 121 |
"total_adversary_flips": sd.get("equal_3round_traced",{}).get("adversary_flips",0)})
|
| 122 |
for p in sd.get("equal_3round_traced",{}).get("per_topic_rounds",[]):
|
|
|
|
| 131 |
|
| 132 |
def evaluate(summary_rows, data):
|
| 133 |
sm = {r["condition"]: r for r in summary_rows}
|
| 134 |
+
# Get retention from first seed (v3 only — v2 has no per-topic data)
|
| 135 |
+
fs = list(_get_seeds(data).values())[0] if _get_seeds(data) else {}
|
| 136 |
+
t = fs.get("equal_3round_traced", fs.get("equal_3round", {}))
|
| 137 |
rr = t.get("honest_retention_round3",0)
|
| 138 |
fr = t.get("flipped_away_round3",0)
|
| 139 |
rate = rr/(rr+fr) if (rr+fr)>0 else 1.0
|