Skip to content

Commit 16899dd

Browse files
committed
chore(soak): add re-armable pots-divergence + #767 wedge monitor
scripts/soak/pots-wedge-monitor.sh polls the from-genesis mainnet soak's dugite metrics, compares reserves/treasury against Koios mainnet /totals on each epoch advance, and emits ONE event line only on an actionable signal (POTS DIVERGENCE, #767 wedge with auto-sample, era transition, hourly heartbeat, or node stop). Used to validate #763 to Conway/ep524.
1 parent 1d1a80e commit 16899dd

1 file changed

Lines changed: 113 additions & 0 deletions

File tree

scripts/soak/pots-wedge-monitor.sh

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env bash
2+
# Soak monitor for the from-genesis mainnet #763 validation run.
3+
# Emits ONE event line per actionable signal only:
4+
# - POTS DIVERGENCE ep{N} (reserves/treasury Δ != 0 vs Koios mainnet /totals)
5+
# - WEDGE #767 (block did not advance for 2 consecutive 60s checks; auto-samples)
6+
# - ERA {name} (era boundary crossed)
7+
# - HEARTBEAT (once/hour: epoch, block, era, pots status, blk/s)
8+
# - SOAK STOPPED (process gone -> exit)
9+
# Everything else (per-epoch byte-exact OK) is tracked silently and folded into HEARTBEAT.
10+
set -uo pipefail
11+
12+
METRICS="http://127.0.0.1:12801/metrics"
13+
NODE_PID=88299
14+
LOG=/private/tmp/dugite-mainnet-soak-20260615-154625.log
15+
REPORTS=/Users/michaelfazio/Source/dugite/reports
16+
17+
era_for() { # epoch -> era name (mainnet)
18+
local e=$1
19+
if [ "$e" -lt 208 ]; then echo Byron
20+
elif [ "$e" -lt 236 ]; then echo Shelley
21+
elif [ "$e" -lt 251 ]; then echo Allegra
22+
elif [ "$e" -lt 290 ]; then echo Mary
23+
elif [ "$e" -lt 365 ]; then echo Alonzo
24+
elif [ "$e" -lt 507 ]; then echo Babbage
25+
else echo Conway; fi
26+
}
27+
28+
metric() { curl -s --max-time 8 "$METRICS" 2>/dev/null | grep -E "^$1 " | awk '{print $2}'; }
29+
30+
koios_pots() { # epoch -> "reserves treasury" (mainnet ground truth)
31+
curl -s --max-time 20 "https://api.koios.rest/api/v1/totals?_epoch_no=$1" -H "accept: application/json" 2>/dev/null \
32+
| python3 -c "import sys,json;d=json.load(sys.stdin);r=d[0] if d else {};print(r.get('reserves','?'),r.get('treasury','?'))" 2>/dev/null
33+
}
34+
35+
last_epoch=-1
36+
last_block=-1
37+
last_era=""
38+
stall=0
39+
pots_status="unknown"
40+
pots_epoch="?"
41+
hb_count=0 # heartbeat once every 60 loops (~60 min)
42+
block_hr_start=-1 # block number at start of the heartbeat hour
43+
44+
while true; do
45+
ep=$(metric dugite_epoch_number)
46+
blk=$(metric dugite_block_number)
47+
48+
# ---- liveness / stop detection ----
49+
if [ -z "$ep" ] || [ -z "$blk" ]; then
50+
if ! kill -0 "$NODE_PID" 2>/dev/null; then
51+
echo "SOAK STOPPED: node pid $NODE_PID gone; last seen ep${last_epoch} block${last_block}"
52+
exit 0
53+
fi
54+
# metrics briefly unreachable but process alive -> skip this tick quietly
55+
sleep 60; continue
56+
fi
57+
58+
[ "$block_hr_start" = "-1" ] && block_hr_start=$blk
59+
60+
# ---- wedge detection ----
61+
if [ "$blk" = "$last_block" ]; then
62+
stall=$((stall+1))
63+
if [ "$stall" -ge 2 ]; then
64+
ts=$(date -u +%Y%m%dT%H%M%SZ)
65+
out="$REPORTS/767-wedge-sample-$ts.txt"
66+
sample "$NODE_PID" 5 -mayDie -f "$out" >/dev/null 2>&1 &
67+
echo "WEDGE #767 at ep${ep} block${blk} slot$(metric dugite_slot_number): block did not advance across 2x60s; sampling -> $out"
68+
stall=0 # don't re-fire every tick; wait for next genuine 2-stall
69+
fi
70+
else
71+
stall=0
72+
fi
73+
74+
# ---- era boundary ----
75+
era=$(era_for "$ep")
76+
if [ -n "$last_era" ] && [ "$era" != "$last_era" ]; then
77+
echo "ERA $era reached at ep${ep} block${blk} (was $last_era)"
78+
fi
79+
last_era=$era
80+
81+
# ---- epoch advance -> pots byte-exactness check ----
82+
if [ "$ep" -gt "$last_epoch" ] && [ "$last_epoch" -ge 0 ]; then
83+
sleep 8 # let boundary pot update settle
84+
d_res=$(metric dugite_reserves_lovelace)
85+
d_tre=$(metric dugite_treasury_lovelace)
86+
read -r k_res k_tre <<<"$(koios_pots "$ep")"
87+
if [ -n "$d_res" ] && [ -n "$k_res" ] && [ "$k_res" != "?" ]; then
88+
if [ "$d_res" = "$k_res" ] && [ "$d_tre" = "$k_tre" ]; then
89+
pots_status="byte-exact"; pots_epoch=$ep
90+
else
91+
pots_status="DIVERGED"; pots_epoch=$ep
92+
dr=$(( ${d_res:-0} - ${k_res:-0} )); dt=$(( ${d_tre:-0} - ${k_tre:-0} ))
93+
echo "POTS DIVERGENCE ep${ep}: reserves dugite=$d_res koios=$k_res Δ=$dr | treasury dugite=$d_tre koios=$k_tre Δ=$dt"
94+
fi
95+
fi
96+
# Conway close-out signal for #763
97+
if [ "$ep" -ge 524 ] && [ "$pots_status" = "byte-exact" ]; then
98+
echo "POTS #763 byte-exact at ep${ep} (>=524, Conway): reserves=$d_res treasury=$d_tre match Koios"
99+
fi
100+
fi
101+
last_epoch=$ep
102+
last_block=$blk
103+
104+
# ---- hourly heartbeat ----
105+
hb_count=$((hb_count+1))
106+
if [ "$hb_count" -ge 60 ]; then
107+
bps=$(( (blk - block_hr_start) / 3600 ))
108+
echo "HEARTBEAT ep${ep} $era block${blk} (~${bps} blk/s last hr) pots=${pots_status}@ep${pots_epoch}"
109+
hb_count=0; block_hr_start=$blk
110+
fi
111+
112+
sleep 60
113+
done

0 commit comments

Comments
 (0)