Skip to content

Commit 648aca6

Browse files
Thread original chunk data through engine pipeline (#4780)
* [secret-storage] Thread original chunk data through engine pipeline Adds OriginalData/ChunkData fields to preserve pre-decode source data through the scan pipeline: 1. Chunk.OriginalData: captures chunk.Data before iterativeDecode 2. engine.go: sets chunk.OriginalData = chunk.Data before decode 3. ResultWithMetadata.ChunkData: populated by CopyMetadata from OriginalData (falls back to Data when nil) This enables downstream consumers (e.g. the dispatcher in thog) to access the original source data for secret storage encryption. * Update TestChunkSize for OriginalData field addition Chunk struct grew from 80 to 104 bytes with the OriginalData []byte slice header (24 bytes). Field placement is already optimal (adjacent to Data []byte). * fix: preserve OriginalData field in EscapedUnicode decoder The EscapedUnicode decoder constructed a new sources.Chunk manually copying fields but omitted OriginalData. This caused CopyMetadata to fall back to the decoded Data instead of the original pre-decode content, defeating the purpose of preserving original chunk data for secret storage encryption. * Address PR review feedback: use testify/assert, add nil-guard comment, remove stale alignment comment --------- Co-authored-by: Cursor Agent <cursoragent@cursor.com>
1 parent 8df943f commit 648aca6

File tree

6 files changed

+60
-7
lines changed

6 files changed

+60
-7
lines changed

pkg/decoders/escaped_unicode.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ func (d *EscapedUnicode) FromChunk(chunk *sources.Chunk) *DecodableChunk {
105105
DecoderType: d.Type(),
106106
Chunk: &sources.Chunk{
107107
Data: chunkData,
108+
OriginalData: chunk.OriginalData,
108109
SourceName: chunk.SourceName,
109110
SourceID: chunk.SourceID,
110111
JobID: chunk.JobID,
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package detectors
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
8+
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
9+
)
10+
11+
func TestCopyMetadata_ChunkDataFromOriginalData(t *testing.T) {
12+
chunk := &sources.Chunk{
13+
Data: []byte("decoded-data"),
14+
OriginalData: []byte("original-source-data"),
15+
SourceName: "test-source",
16+
}
17+
result := Result{
18+
DetectorType: 1,
19+
Raw: []byte("secret"),
20+
}
21+
22+
rwm := CopyMetadata(chunk, result)
23+
24+
assert.Equal(t, "original-source-data", string(rwm.ChunkData))
25+
}
26+
27+
func TestCopyMetadata_ChunkDataFallsBackToData(t *testing.T) {
28+
chunk := &sources.Chunk{
29+
Data: []byte("only-data"),
30+
SourceName: "test-source",
31+
}
32+
result := Result{
33+
DetectorType: 1,
34+
Raw: []byte("secret"),
35+
}
36+
37+
rwm := CopyMetadata(chunk, result)
38+
39+
assert.Equal(t, "only-data", string(rwm.ChunkData))
40+
}

pkg/detectors/detectors.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,19 @@ type ResultWithMetadata struct {
217217
DetectorDescription string
218218
// DecoderType is the type of decoder that was used to generate this result's data.
219219
DecoderType detectorspb.DecoderType
220+
// ChunkData holds the original pre-decode source chunk data, preserved
221+
// for secret storage encryption in the dispatcher.
222+
ChunkData []byte
220223
}
221224

222225
// CopyMetadata returns a detector result with included metadata from the source chunk.
223226
func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
227+
// OriginalData may be nil when CopyMetadata is called outside the engine
228+
// pipeline (e.g., in tests or external consumers that construct chunks directly).
229+
chunkData := chunk.OriginalData
230+
if chunkData == nil {
231+
chunkData = chunk.Data
232+
}
224233
return ResultWithMetadata{
225234
SourceMetadata: chunk.SourceMetadata,
226235
SourceID: chunk.SourceID,
@@ -229,6 +238,7 @@ func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
229238
SourceType: chunk.SourceType,
230239
SourceName: chunk.SourceName,
231240
Result: result,
241+
ChunkData: chunkData,
232242
}
233243
}
234244

pkg/engine/engine.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,7 @@ func (e *Engine) scannerWorker(ctx context.Context) {
854854
startTime := time.Now()
855855
sourceVerify := chunk.SourceVerify
856856

857+
chunk.OriginalData = chunk.Data
857858
decoded := iterativeDecode(chunk, e.decoders, e.maxDecodeDepth)
858859

859860
for _, d := range decoded {

pkg/sources/sources.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,13 @@ type (
2121
)
2222

2323
// Chunk contains data to be decoded and scanned along with context on where it came from.
24-
//
25-
// **Important:** The order of the fields in this struct is specifically designed to optimize
26-
// struct alignment and minimize memory usage. Do not change the field order without carefully considering
27-
// the potential impact on memory consumption.
28-
// Ex: https://go.dev/play/p/Azf4a7O-DhC
2924
type Chunk struct {
3025
// Data is the data to decode and scan.
3126
Data []byte
27+
// OriginalData holds the pre-decode source data, preserved for secret
28+
// storage. Set before iterative decoding so it retains the original
29+
// content even after Data is replaced with decoded forms.
30+
OriginalData []byte
3231

3332
// SourceName is the name of the Source that produced the chunk.
3433
SourceName string

pkg/sources/sources_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ import (
77
"github.com/stretchr/testify/assert"
88
)
99

10-
// TestChunkSize ensures that the Chunk struct does not exceed 80 bytes.
10+
// TestChunkSize ensures that the Chunk struct does not exceed 104 bytes.
11+
// Size increased from 80 to 104 with the addition of OriginalData []byte
12+
// (24-byte slice header) for secret storage chunk threading.
1113
func TestChunkSize(t *testing.T) {
1214
t.Parallel()
13-
assert.Equal(t, unsafe.Sizeof(Chunk{}), uintptr(80), "Chunk struct size exceeds 80 bytes")
15+
assert.Equal(t, unsafe.Sizeof(Chunk{}), uintptr(104), "Chunk struct size exceeds 104 bytes")
1416
}

0 commit comments

Comments
 (0)