-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathentropy.py
More file actions
251 lines (206 loc) · 9.18 KB
/
entropy.py
File metadata and controls
251 lines (206 loc) · 9.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/env python3
"""Entropy: LLM Documentation-Code Drift Experiment.
Orchestrates a loop where an LLM writes a spec, another builds code from it,
a third re-documents the code, and a judge scores intent preservation.
Repeat N times and watch semantic drift accumulate.
"""
import argparse
import json
import os
import re
import shutil
import sys
import time
import anthropic
import git_ops
import metrics
import prompts
MAX_CONTENT_CHARS = 150_000
DEFAULT_MODEL = "claude-sonnet-4-6"
def call_llm(client, model: str, system: str, user: str, verbose: bool = False) -> tuple[str, int]:
"""Call the Anthropic API. Returns (response_text, output_tokens)."""
if verbose:
print(f" [LLM] sending {len(user)} chars...")
msg = client.messages.create(
model=model,
max_tokens=8192,
system=system,
messages=[{"role": "user", "content": user}],
)
text = msg.content[0].text
tokens = msg.usage.output_tokens
if verbose:
print(f" [LLM] got {tokens} tokens back")
return text, tokens
def parse_files(response: str) -> dict[str, str]:
"""Parse builder output into {filename: content} using --- FILE: name --- markers."""
pattern = re.compile(r'^---\s*FILE:\s*(.+?)\s*---\s*$', re.MULTILINE)
splits = pattern.split(response)
if len(splits) < 3:
return {}
files = {}
# splits: [preamble, name1, content1, name2, content2, ...]
for i in range(1, len(splits), 2):
name = splits[i].strip()
content = splits[i + 1] if i + 1 < len(splits) else ""
# Security: reject path traversal
if ".." in name or name.startswith("/"):
continue
files[name] = content.strip() + "\n"
return files
def read_workspace_code(workspace: str) -> str:
"""Read all code files from workspace, excluding SPEC.md and .git/."""
parts = []
total = 0
for root, dirs, files in os.walk(workspace):
dirs[:] = [d for d in dirs if d != ".git"]
for fname in sorted(files):
if fname == "SPEC.md":
continue
fpath = os.path.join(root, fname)
relpath = os.path.relpath(fpath, workspace)
try:
with open(fpath, "r", errors="replace") as f:
content = f.read()
except OSError:
continue
chunk = f"--- FILE: {relpath} ---\n{content}\n"
if total + len(chunk) > MAX_CONTENT_CHARS:
parts.append(f"\n[TRUNCATED: workspace exceeded {MAX_CONTENT_CHARS} chars]\n")
return "\n".join(parts)
parts.append(chunk)
total += len(chunk)
return "\n".join(parts)
def write_files(workspace: str, files: dict[str, str]) -> None:
"""Write parsed files to the workspace directory."""
for name, content in files.items():
fpath = os.path.join(workspace, name)
os.makedirs(os.path.dirname(fpath) if os.path.dirname(name) else workspace, exist_ok=True)
with open(fpath, "w") as f:
f.write(content)
def parse_judge_response(text: str) -> dict:
"""Parse judge output into structured fields."""
result = {"intent_score": 5, "feature_drift": "unparseable", "specificity_shift": "unchanged"}
m = re.search(r'INTENT_SCORE:\s*(\d+)', text)
if m:
result["intent_score"] = min(10, max(0, int(m.group(1))))
m = re.search(r'FEATURE_DRIFT:\s*(.+)', text)
if m:
result["feature_drift"] = m.group(1).strip()
m = re.search(r'SPECIFICITY_SHIFT:\s*(\S+)', text)
if m:
result["specificity_shift"] = m.group(1).strip()
return result
def main():
parser = argparse.ArgumentParser(description="Entropy: LLM Documentation-Code Drift Experiment")
parser.add_argument("--iterations", type=int, default=10, help="Number of build/re-document cycles")
parser.add_argument("--model", default=DEFAULT_MODEL, help="Anthropic model to use")
parser.add_argument("--workspace", default="workspace", help="Workspace directory")
parser.add_argument("--output", default="output", help="Output directory for metrics")
parser.add_argument("--clean", action="store_true", help="Remove existing workspace before starting")
parser.add_argument("--verbose", action="store_true", help="Print detailed progress")
args = parser.parse_args()
workspace = os.path.abspath(args.workspace)
output_dir = os.path.abspath(args.output)
spec_path = os.path.join(workspace, "SPEC.md")
# Handle existing workspace
if os.path.exists(workspace):
if not args.clean:
print(f"Error: {workspace} already exists. Use --clean to remove it.", file=sys.stderr)
sys.exit(1)
shutil.rmtree(workspace)
os.makedirs(workspace)
os.makedirs(output_dir, exist_ok=True)
client = anthropic.Anthropic()
all_metrics = []
# --- SEED PHASE ---
print("=== Seed: generating initial spec ===")
seed_user = prompts.INITIAL_DOCUMENTER_USER.format(prompt=prompts.SEED_PROMPT)
spec_text, seed_tokens = call_llm(
client, args.model, prompts.INITIAL_DOCUMENTER_SYSTEM, seed_user, args.verbose
)
with open(spec_path, "w") as f:
f.write(spec_text)
# Save original spec for the judge
original_spec_path = os.path.join(output_dir, "original_spec.md")
with open(original_spec_path, "w") as f:
f.write(spec_text)
git_ops.init_repo(workspace)
git_ops.commit_all(workspace, "seed: initial spec")
print(f" Seed spec: {len(spec_text.split())} words")
# --- ITERATION LOOP ---
for i in range(1, args.iterations + 1):
print(f"\n=== Iteration {i}/{args.iterations} ===")
iter_metrics = {"iteration": i}
# BUILD
print(" Building code from spec...")
spec_text = open(spec_path).read()
existing_code = read_workspace_code(workspace)
if existing_code.strip():
code_section = prompts.BUILDER_USER_EXISTING_CODE.format(code=existing_code)
else:
code_section = prompts.BUILDER_USER_NO_CODE
builder_user = prompts.BUILDER_USER.format(spec=spec_text, existing_code_section=code_section)
builder_response, builder_tokens = call_llm(
client, args.model, prompts.BUILDER_SYSTEM, builder_user, args.verbose
)
iter_metrics["builder_response_tokens"] = builder_tokens
files = parse_files(builder_response)
if not files:
# Retry once
if args.verbose:
print(" [WARN] No --- FILE: markers found, retrying...")
builder_response, retry_tokens = call_llm(
client, args.model, prompts.BUILDER_SYSTEM, builder_user, args.verbose
)
iter_metrics["builder_response_tokens"] += retry_tokens
files = parse_files(builder_response)
if not files:
# Fallback: treat entire response as index.html
if args.verbose:
print(" [WARN] Retry failed, treating response as index.html")
files = {"index.html": builder_response}
write_files(workspace, files)
git_ops.commit_all(workspace, f"iteration {i}: build")
print(f" Built {len(files)} file(s)")
# RE-DOCUMENT
print(" Re-documenting from code...")
code_text = read_workspace_code(workspace)
redoc_user = prompts.REDOCUMENTER_USER.format(code=code_text)
new_spec, doc_tokens = call_llm(
client, args.model, prompts.REDOCUMENTER_SYSTEM, redoc_user, args.verbose
)
iter_metrics["documenter_response_tokens"] = doc_tokens
with open(spec_path, "w") as f:
f.write(new_spec)
git_ops.commit_all(workspace, f"iteration {i}: re-document")
print(f" New spec: {len(new_spec.split())} words")
# JUDGE
print(" Judging intent preservation...")
original_spec = open(original_spec_path).read()
judge_user = prompts.JUDGE_USER.format(original_spec=original_spec, current_spec=new_spec)
judge_response, _ = call_llm(
client, args.model, prompts.JUDGE_SYSTEM, judge_user, args.verbose
)
judge_scores = parse_judge_response(judge_response)
iter_metrics.update(judge_scores)
print(f" Intent score: {judge_scores['intent_score']}/10 — {judge_scores['feature_drift']}")
# MEASURE
code_metrics = metrics.collect_metrics(workspace, spec_path)
iter_metrics.update(code_metrics)
all_metrics.append(iter_metrics)
# --- FINALIZE ---
metrics_path = os.path.join(output_dir, "metrics.json")
with open(metrics_path, "w") as f:
json.dump(all_metrics, f, indent=2)
print(f"\n=== Done: {args.iterations} iterations ===")
print(f"Metrics written to {metrics_path}")
print(f"Workspace at {workspace} (inspect with: cd {args.workspace} && git log --stat)")
# Summary table
print(f"\n{'Iter':>4} {'LOC':>5} {'Files':>5} {'Words':>5} {'Intent':>6} {'Drift'}")
print("-" * 70)
for m in all_metrics:
print(f"{m['iteration']:>4} {m['total_lines_of_code']:>5} {m['file_count']:>5} "
f"{m['doc_word_count']:>5} {m['intent_score']:>5}/10 {m.get('feature_drift', '')[:40]}")
if __name__ == "__main__":
main()