Skip to content

Commit 47d7c4a

Browse files
authored
feat: Filter kwargs from new token events (#2714)
1 parent 3c57445 commit 47d7c4a

File tree

6 files changed

+236
-7
lines changed

6 files changed

+236
-7
lines changed

js/src/client.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,27 @@ export class Client implements LangSmithTracingClientInterface {
10661066
return outputs;
10671067
}
10681068

1069+
/**
1070+
* Filter content from new_token events to prevent streaming LLM output
1071+
* from being uploaded via events.
1072+
*/
1073+
private _filterNewTokenEvents(
1074+
events: KVMap[] | undefined
1075+
): KVMap[] | undefined {
1076+
if (!events || events.length === 0) {
1077+
return events;
1078+
}
1079+
return events.map((event) => {
1080+
if (event.name === "new_token") {
1081+
// Remove the kwargs containing the token data
1082+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
1083+
const { kwargs: _, ...rest } = event;
1084+
return rest;
1085+
}
1086+
return event;
1087+
});
1088+
}
1089+
10691090
private async prepareRunCreateOrUpdateInputs(
10701091
run: RunUpdate
10711092
): Promise<RunUpdate>;
@@ -1082,6 +1103,9 @@ export class Client implements LangSmithTracingClientInterface {
10821103
if (runParams.outputs !== undefined) {
10831104
runParams.outputs = await this.processOutputs(runParams.outputs);
10841105
}
1106+
if (runParams.events !== undefined) {
1107+
runParams.events = this._filterNewTokenEvents(runParams.events);
1108+
}
10851109
return runParams;
10861110
}
10871111

@@ -2189,6 +2213,10 @@ export class Client implements LangSmithTracingClientInterface {
21892213
if (run.outputs) {
21902214
run.outputs = await this.processOutputs(run.outputs);
21912215
}
2216+
2217+
if (run.events) {
2218+
run.events = this._filterNewTokenEvents(run.events);
2219+
}
21922220
// TODO: Untangle types
21932221
const data: UpdateRunParams = { ...run, id: runId };
21942222
if (!this._filterForSampling([data], true).length) {

js/src/tests/client.test.ts

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,4 +1029,98 @@ describe("Client", () => {
10291029
expect(mergedHeaders["x-api-key"]).toBe("test-api-key");
10301030
});
10311031
});
1032+
1033+
describe("_filterNewTokenEvents", () => {
1034+
it("should strip kwargs from new_token events", () => {
1035+
const client = new Client({ apiKey: "test-api-key" });
1036+
const events = [
1037+
{
1038+
name: "new_token",
1039+
kwargs: { token: "sensitive streaming data" },
1040+
time: "2024-01-01T00:00:00Z",
1041+
},
1042+
{
1043+
name: "other_event",
1044+
kwargs: { data: "keep this" },
1045+
time: "2024-01-01T00:00:01Z",
1046+
},
1047+
];
1048+
1049+
const filtered = (client as any)._filterNewTokenEvents(events);
1050+
1051+
expect(filtered[0].name).toBe("new_token");
1052+
expect(filtered[0].time).toBe("2024-01-01T00:00:00Z");
1053+
expect(filtered[0].kwargs).toBeUndefined();
1054+
expect(filtered[1].kwargs).toEqual({ data: "keep this" });
1055+
});
1056+
1057+
it("should handle empty events array", () => {
1058+
const client = new Client({ apiKey: "test-api-key" });
1059+
const filtered = (client as any)._filterNewTokenEvents([]);
1060+
expect(filtered).toEqual([]);
1061+
});
1062+
1063+
it("should handle undefined events", () => {
1064+
const client = new Client({ apiKey: "test-api-key" });
1065+
const filtered = (client as any)._filterNewTokenEvents(undefined);
1066+
expect(filtered).toBeUndefined();
1067+
});
1068+
1069+
it("should handle events without kwargs", () => {
1070+
const client = new Client({ apiKey: "test-api-key" });
1071+
const events = [
1072+
{ name: "new_token", time: "2024-01-01T00:00:00Z" },
1073+
{ name: "other_event", time: "2024-01-01T00:00:01Z" },
1074+
];
1075+
1076+
const filtered = (client as any)._filterNewTokenEvents(events);
1077+
1078+
expect(filtered[0]).toEqual({
1079+
name: "new_token",
1080+
time: "2024-01-01T00:00:00Z",
1081+
});
1082+
expect(filtered[1]).toEqual({
1083+
name: "other_event",
1084+
time: "2024-01-01T00:00:01Z",
1085+
});
1086+
});
1087+
1088+
it("should preserve other event properties", () => {
1089+
const client = new Client({ apiKey: "test-api-key" });
1090+
const events = [
1091+
{
1092+
name: "new_token",
1093+
kwargs: { token: "data" },
1094+
time: "2024-01-01T00:00:00Z",
1095+
message: "token received",
1096+
custom_field: "custom_value",
1097+
},
1098+
];
1099+
1100+
const filtered = (client as any)._filterNewTokenEvents(events);
1101+
1102+
expect(filtered[0].name).toBe("new_token");
1103+
expect(filtered[0].time).toBe("2024-01-01T00:00:00Z");
1104+
expect(filtered[0].message).toBe("token received");
1105+
expect(filtered[0].custom_field).toBe("custom_value");
1106+
expect(filtered[0].kwargs).toBeUndefined();
1107+
});
1108+
1109+
it("should filter multiple new_token events", () => {
1110+
const client = new Client({ apiKey: "test-api-key" });
1111+
const events = [
1112+
{ name: "new_token", kwargs: { token: "chunk1" }, time: "t1" },
1113+
{ name: "new_token", kwargs: { token: "chunk2" }, time: "t2" },
1114+
{ name: "new_token", kwargs: { token: "chunk3" }, time: "t3" },
1115+
];
1116+
1117+
const filtered = (client as any)._filterNewTokenEvents(events);
1118+
1119+
expect(filtered).toHaveLength(3);
1120+
filtered.forEach((event: any) => {
1121+
expect(event.kwargs).toBeUndefined();
1122+
expect(event.name).toBe("new_token");
1123+
});
1124+
});
1125+
});
10321126
});

js/src/tests/wrapped_anthropic.int.test.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,7 @@ describe.skip("Requires Anthropic API key", () => {
143143
expect(tokenEvents.length).toBeGreaterThan(0);
144144
tokenEvents.forEach((event: any) => {
145145
expect(event.name).toBe("new_token");
146-
expect(event.kwargs).toBeDefined();
147-
expect(event.kwargs.token).toBeDefined();
146+
expect(event.kwargs).toBeUndefined();
148147
expect(event.time).toBeDefined();
149148
});
150149

js/src/tests/wrapped_openai.int.test.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,7 @@ test("chat.completions", async () => {
111111
expect(tokenEvents.length).toBeGreaterThan(0);
112112
tokenEvents.forEach((event: any) => {
113113
expect(event.name).toBe("new_token");
114-
expect(event.kwargs).toBeDefined();
115-
expect(event.kwargs.token).toBeDefined();
114+
expect(event.kwargs).toBeUndefined();
116115
expect(event.time).toBeDefined();
117116
});
118117

@@ -368,8 +367,7 @@ test("chat completions with tool calling", async () => {
368367
expect(tokenEvents.length).toBeGreaterThan(0);
369368
tokenEvents.forEach((event: any) => {
370369
expect(event.name).toBe("new_token");
371-
expect(event.kwargs).toBeDefined();
372-
expect(event.kwargs.token).toBeDefined();
370+
expect(event.kwargs).toBeUndefined();
373371
expect(event.time).toBeDefined();
374372
});
375373

python/langsmith/client.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,8 @@ def _run_transform(
19461946
if copy:
19471947
run_create["outputs"] = ls_utils.deepish_copy(run_create["outputs"])
19481948
run_create["outputs"] = self._hide_run_outputs(run_create["outputs"])
1949+
if "events" in run_create and run_create["events"] is not None:
1950+
run_create["events"] = self._filter_new_token_events(run_create["events"])
19491951
# Hide metadata in extra if present
19501952
if "extra" in run_create and isinstance(run_create["extra"], dict):
19511953
extra = run_create["extra"]
@@ -2380,6 +2382,29 @@ def _hide_run_metadata(self, metadata: dict) -> dict:
23802382
return metadata
23812383
return self._hide_metadata(metadata)
23822384

2385+
@staticmethod
2386+
def _filter_new_token_events(
2387+
events: Optional[Sequence[dict]],
2388+
) -> Optional[list[dict]]:
2389+
"""Filter content from new_token events.
2390+
2391+
This prevents streaming LLM output from being uploaded via events.
2392+
2393+
Args:
2394+
events: The events to filter.
2395+
2396+
Returns:
2397+
The filtered events with kwargs removed from new_token events.
2398+
"""
2399+
if not events:
2400+
return events # type: ignore[return-value]
2401+
return [
2402+
{k: v for k, v in event.items() if k != "kwargs"}
2403+
if event.get("name") == "new_token"
2404+
else event
2405+
for event in events
2406+
]
2407+
23832408
def _should_flush_run_ops_buffer(self) -> bool:
23842409
"""Check if the run ops buffer should be flushed based on size or time."""
23852410
if not self._run_ops_buffer:
@@ -3290,7 +3315,7 @@ def update_run(
32903315
outputs = ls_utils.deepish_copy(outputs)
32913316
data["outputs"] = self._hide_run_outputs(outputs)
32923317
if events is not None:
3293-
data["events"] = events
3318+
data["events"] = self._filter_new_token_events(events)
32943319
if data["extra"]:
32953320
self._insert_runtime_env([data])
32963321
if metadata := data["extra"].get("metadata"):

python/tests/unit_tests/test_client.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5508,3 +5508,88 @@ def test_env_var_not_set_leaves_dir_none(self, monkeypatch):
55085508
)
55095509
assert client._failed_traces_dir is None
55105510
_clear_env_cache()
5511+
5512+
def test_filter_new_token_events_strips_kwargs(self):
5513+
"""Test that _filter_new_token_events strips kwargs from new_token events."""
5514+
client = Client(api_url="http://localhost:1984", api_key="test")
5515+
events = [
5516+
{
5517+
"name": "new_token",
5518+
"kwargs": {"token": "sensitive streaming data"},
5519+
"time": "2024-01-01T00:00:00Z",
5520+
},
5521+
{
5522+
"name": "other_event",
5523+
"kwargs": {"data": "keep this"},
5524+
"time": "2024-01-01T00:00:01Z",
5525+
},
5526+
]
5527+
5528+
filtered = client._filter_new_token_events(events)
5529+
5530+
assert filtered[0]["name"] == "new_token"
5531+
assert filtered[0]["time"] == "2024-01-01T00:00:00Z"
5532+
assert "kwargs" not in filtered[0]
5533+
assert filtered[1]["kwargs"] == {"data": "keep this"}
5534+
5535+
def test_filter_new_token_events_empty_events(self):
5536+
"""Test that _filter_new_token_events handles empty events list."""
5537+
client = Client(api_url="http://localhost:1984", api_key="test")
5538+
filtered = client._filter_new_token_events([])
5539+
assert filtered == []
5540+
5541+
def test_filter_new_token_events_none_events(self):
5542+
"""Test that _filter_new_token_events handles None events."""
5543+
client = Client(api_url="http://localhost:1984", api_key="test")
5544+
filtered = client._filter_new_token_events(None)
5545+
assert filtered is None
5546+
5547+
def test_filter_new_token_events_without_kwargs(self):
5548+
"""Test that _filter_new_token_events handles events without kwargs."""
5549+
client = Client(api_url="http://localhost:1984", api_key="test")
5550+
events = [
5551+
{"name": "new_token", "time": "2024-01-01T00:00:00Z"},
5552+
{"name": "other_event", "time": "2024-01-01T00:00:01Z"},
5553+
]
5554+
5555+
filtered = client._filter_new_token_events(events)
5556+
5557+
assert filtered[0] == {"name": "new_token", "time": "2024-01-01T00:00:00Z"}
5558+
assert filtered[1] == {"name": "other_event", "time": "2024-01-01T00:00:01Z"}
5559+
5560+
def test_filter_new_token_events_preserves_other_properties(self):
5561+
"""Test that _filter_new_token_events preserves other event properties."""
5562+
client = Client(api_url="http://localhost:1984", api_key="test")
5563+
events = [
5564+
{
5565+
"name": "new_token",
5566+
"kwargs": {"token": "data"},
5567+
"time": "2024-01-01T00:00:00Z",
5568+
"message": "token received",
5569+
"custom_field": "custom_value",
5570+
}
5571+
]
5572+
5573+
filtered = client._filter_new_token_events(events)
5574+
5575+
assert filtered[0]["name"] == "new_token"
5576+
assert filtered[0]["time"] == "2024-01-01T00:00:00Z"
5577+
assert filtered[0]["message"] == "token received"
5578+
assert filtered[0]["custom_field"] == "custom_value"
5579+
assert "kwargs" not in filtered[0]
5580+
5581+
def test_filter_new_token_events_multiple_new_token_events(self):
5582+
"""Test that _filter_new_token_events filters multiple new_token events."""
5583+
client = Client(api_url="http://localhost:1984", api_key="test")
5584+
events = [
5585+
{"name": "new_token", "kwargs": {"token": "chunk1"}, "time": "t1"},
5586+
{"name": "new_token", "kwargs": {"token": "chunk2"}, "time": "t2"},
5587+
{"name": "new_token", "kwargs": {"token": "chunk3"}, "time": "t3"},
5588+
]
5589+
5590+
filtered = client._filter_new_token_events(events)
5591+
5592+
assert len(filtered) == 3
5593+
for event in filtered:
5594+
assert "kwargs" not in event
5595+
assert event["name"] == "new_token"

0 commit comments

Comments
 (0)