micasa-dev · cpcloud · Mar 10, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 10, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -203,6 +203,10 @@ details; do not duplicate that detail here.
 
 ### Git and CI
 
+- **Reply to PR review comments**: After addressing a PR review comment,
+  reply to the comment on GitHub (via `gh api .../replies`) explaining
+  how it was addressed (commit hash, what changed, tests added). Do this
+  for every comment, not just some.
 - **Never use `git commit --no-verify`**: No exceptions. Fix every hook
   failure before committing.
 - **Treat all linter/compiler warnings as bugs**: Fix all warnings from

diff --git a/cmd/micasa/main.go b/cmd/micasa/main.go
@@ -163,7 +163,6 @@ func (cmd *runCmd) Run() error {
 		cfg.Extraction.MaxPages,
 		0, // pdftotext uses its own internal default timeout (30s)
 		cfg.Extraction.IsOCREnabled(),
-		cfg.Extraction.OCR.ConfidenceThreshold,
 	)
 	opts.SetExtraction(
 		exCfg.Provider,
@@ -175,6 +174,8 @@ func (cmd *runCmd) Run() error {
 		extractors,
 		cfg.Extraction.IsEnabled(),
 		cfg.Extraction.LLMTimeoutDuration(),
+		cfg.Extraction.IsOCRTSV(),
+		cfg.Extraction.OCRConfThreshold(),
 	)
 
 	model, err := app.NewModel(store, opts)

diff --git a/internal/app/extraction.go b/internal/app/extraction.go
@@ -517,12 +517,14 @@ func (m *Model) llmExtractCmd(ctx context.Context, ex *extractionLogState) tea.C
 			ex.llmCancelFn = cancel
 		}
 		messages := extract.BuildExtractionPrompt(extract.ExtractionPromptInput{
-			DocID:     ex.DocID,
-			Filename:  ex.Filename,
-			MIME:      ex.mime,
-			SizeBytes: int64(len(ex.fileData)),
-			Schema:    schemaCtx,
-			Sources:   ex.sources,
+			DocID:         ex.DocID,
+			Filename:      ex.Filename,
+			MIME:          ex.mime,
+			SizeBytes:     int64(len(ex.fileData)),
+			Schema:        schemaCtx,
+			Sources:       ex.sources,
+			SendTSV:       m.ex.ocrTSV,
+			ConfThreshold: m.ex.ocrConfThreshold,
 		})
 		ch, err := client.ChatStream(
 			llmCtx,
@@ -926,6 +928,18 @@ func (m *Model) commitShadowOperations(ex *extractionLogState, ops []extract.Ope
 	return nil
 }
 
+// toggleExtractionTSV flips the ocrTSV setting and reruns the LLM step
+// so the user can compare extraction quality with and without spatial layout.
+func (m *Model) toggleExtractionTSV() tea.Cmd {
+	m.ex.ocrTSV = !m.ex.ocrTSV
+	if m.ex.ocrTSV {
+		m.setStatusInfo("layout on")
+	} else {
+		m.setStatusInfo("layout off")
+	}
+	return m.rerunLLMExtraction()
+}
+
 // rerunLLMExtraction resets the LLM step and re-runs it.
 func (m *Model) rerunLLMExtraction() tea.Cmd {
 	ex := m.ex.extraction
@@ -1079,6 +1093,10 @@ func (m *Model) handleExtractionPipelineKey(msg tea.KeyMsg) tea.Cmd {
 		if ex.Done && ex.hasLLM && ex.cursorStep() == stepLLM {
 			return m.activateExtractionModelPicker()
 		}
+	case keyT:
+		if ex.Done && ex.hasLLM {
+			return m.toggleExtractionTSV()
+		}
 	case keyA:
 		if ex.Done {
 			m.acceptExtraction()
@@ -1511,6 +1529,13 @@ func (m *Model) buildExtractionPipelineOverlay(
 			hints = append(hints, m.helpItem(keyX, "explore"))
 		}
 		if ex.Done {
+			if ex.hasLLM {
+				label := "layout on"
+				if m.ex.ocrTSV {
+					label = "layout off"
+				}
+				hints = append(hints, m.helpItem(keyT, label))
+			}
 			hints = append(hints, m.helpItem(keyA, "accept"), m.helpItem(keyEsc, "discard"))
 		} else {
 			hints = append(hints,

diff --git a/internal/app/extraction_test.go b/internal/app/extraction_test.go
@@ -2842,3 +2842,103 @@ func TestAccept_DeferredDoc_WorksWithoutLLMStep(t *testing.T) {
 	require.NoError(t, err)
 	assert.Equal(t, "better ocr text", full.ExtractedText)
 }
+
+// --- TSV toggle ---
+
+func TestExtractionTSVToggle_TogglesOCRTSV(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	assert.False(t, m.ex.ocrTSV, "ocrTSV should start false in test setup")
+
+	// Press t to toggle layout on.
+	sendExtractionKey(m, keyT)
+	assert.True(t, m.ex.ocrTSV, "t should toggle ocrTSV on")
+
+	// LLM step should be reset for rerun.
+	assert.Equal(t, stepRunning, ex.Steps[stepLLM].Status,
+		"LLM step should be rerunning after toggle")
+}
+
+func TestExtractionTSVToggle_TogglesOff(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+	m.ex.ocrTSV = true
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should toggle ocrTSV off")
+}
+
+func TestExtractionTSVToggle_IgnoredWhenNotDone(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepRunning,
+		stepLLM:     stepPending,
+	})
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should be ignored when extraction is not done")
+}
+
+func TestExtractionTSVToggle_IgnoredWithoutLLM(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should be ignored when no LLM step")
+}
+
+func TestExtractionTSVToggle_StatusMessage(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	sendExtractionKey(m, keyT)
+	assert.Contains(t, m.status.Text, "layout on")
+
+	// Simulate LLM completing again so we can toggle off.
+	ex.Done = true
+	ex.Steps[stepLLM] = extractionStepInfo{Status: stepDone}
+
+	sendExtractionKey(m, keyT)
+	assert.Contains(t, m.status.Text, "layout off")
+}
+
+func TestExtractionTSVToggle_HintShownInFooter(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+	m.width = 120
+	m.height = 40
+
+	view := m.View()
+	assert.Contains(t, view, "layout", "footer should show layout hint when done with LLM")
+}
diff --git a/internal/app/model.go b/internal/app/model.go
@@ -273,6 +273,8 @@ func NewModel(store *data.Store, options Options) (*Model, error) {
 			extractionTimeout:   options.ExtractionConfig.Timeout,
 			extractionThinking:  options.ExtractionConfig.Thinking,
 			extractionEnabled:   options.ExtractionConfig.Enabled,
+			ocrTSV:              options.ExtractionConfig.OCRTSV,
+			ocrConfThreshold:    options.ExtractionConfig.OCRConfThreshold,
 			extractors:          options.ExtractionConfig.Extractors,
 			llmInferenceTimeout: options.ExtractionConfig.LLMInferenceTimeout,
 		},

diff --git a/internal/app/types.go b/internal/app/types.go
@@ -94,6 +94,8 @@ type extractState struct {
 	extractionTimeout   time.Duration
 	extractionThinking  string
 	extractionEnabled   bool
+	ocrTSV              bool
+	ocrConfThreshold    int
 	extractionClient    *llm.Client
 	extractors          []extract.Extractor
 	extractionReady     bool
@@ -292,8 +294,10 @@ type extractionConfig struct {
 	Thinking            string // reasoning effort level
 	LLMInferenceTimeout time.Duration
 
-	Extractors []extract.Extractor // configured extractors; nil = defaults
-	Enabled    bool                // LLM extraction enabled
+	Extractors       []extract.Extractor // configured extractors; nil = defaults
+	Enabled          bool                // LLM extraction enabled
+	OCRTSV           bool                // send spatial layout annotations to LLM
+	OCRConfThreshold int                 // confidence threshold for spatial annotations
 }
 
 // SetExtraction configures the extraction pipeline on the Options.
@@ -304,6 +308,8 @@ func (o *Options) SetExtraction(
 	extractors []extract.Extractor,
 	enabled bool,
 	llmInferenceTimeout time.Duration,
+	ocrTSV bool,
+	ocrConfThreshold int,
 ) {
 	o.ExtractionConfig = extractionConfig{
 		Provider:            provider,
@@ -315,6 +321,8 @@ func (o *Options) SetExtraction(
 		LLMInferenceTimeout: llmInferenceTimeout,
 		Extractors:          extractors,
 		Enabled:             enabled,
+		OCRTSV:              ocrTSV,
+		OCRConfThreshold:    ocrConfThreshold,
 	}
 }
 

diff --git a/internal/config/config.go b/internal/config/config.go
@@ -292,10 +292,17 @@ type OCR struct {
 	// When disabled, scanned pages and images produce no text. Default: true.
 	Enable *bool `toml:"enable,omitempty"`
 
-	// ConfidenceThreshold is the minimum tesseract word confidence (0-100)
-	// to keep in OCR output. Words below this threshold are dropped.
-	// 0 means no filtering (all words kept). Default: 0.
-	ConfidenceThreshold int `toml:"confidence_threshold"`
+	// TSV sends spatial layout annotations (line-level bounding boxes
+	// and confidence scores) from tesseract OCR to the LLM alongside text.
+	// This helps extraction accuracy for invoices and forms with tabular
+	// data, at ~2x token overhead. Default: true.
+	TSV *bool `toml:"tsv,omitempty"`
+
+	// ConfidenceThresholdVal is the confidence threshold (0-100) below
+	// which OCR confidence annotations are included in spatial layout
+	// output. Lines with min confidence >= this value omit the score to
+	// save tokens. Set to 0 to never show confidence. Default: 70.
+	ConfidenceThresholdVal *int `toml:"confidence_threshold,omitempty"`
 }
 
 // IsEnabled returns whether LLM extraction is enabled. Defaults to true
@@ -335,6 +342,24 @@ func (e Extraction) ThinkingLevel() string {
 	return e.Thinking
 }
 
+// IsOCRTSV returns whether spatial layout annotations from tesseract OCR
+// should be sent to the LLM alongside text. Defaults to true.
+func (e Extraction) IsOCRTSV() bool {
+	if e.OCR.TSV != nil {
+		return *e.OCR.TSV
+	}
+	return true
+}
+
+// OCRConfThreshold returns the confidence threshold below which OCR
+// confidence annotations appear in spatial output. Defaults to 70.
+func (e Extraction) OCRConfThreshold() int {
+	if e.OCR.ConfidenceThresholdVal != nil {
+		return *e.OCR.ConfidenceThresholdVal
+	}
+	return 70
+}
+
 // ResolvedModel returns the extraction model, falling back to the given
 // chat model if no extraction-specific model is configured.
 func (e Extraction) ResolvedModel(chatModel string) string {
@@ -530,10 +555,9 @@ func LoadFromPath(path string) (Config, error) {
 		)
 	}
 
-	if cfg.Extraction.OCR.ConfidenceThreshold < 0 || cfg.Extraction.OCR.ConfidenceThreshold > 100 {
+	if t := cfg.Extraction.OCRConfThreshold(); t < 0 || t > 100 {
 		return cfg, fmt.Errorf(
-			"extraction.ocr.confidence_threshold must be 0-100, got %d",
-			cfg.Extraction.OCR.ConfidenceThreshold,
+			"extraction.ocr.confidence_threshold must be 0-100, got %d", t,
 		)
 	}
 
@@ -1145,6 +1169,17 @@ model = "` + DefaultModel + `"
 # threshold are dropped. 0 = no filtering. Default: 0.
 # confidence_threshold = 0
 
+[extraction.ocr]
+# Send spatial layout annotations (line-level bounding boxes) from tesseract
+# OCR to the LLM alongside text. Improves extraction accuracy for invoices
+# and forms with tabular data, at ~2x token overhead. Default: true.
+# tsv = true
+
+# Confidence threshold (0-100) for spatial annotations. Lines with OCR
+# confidence below this threshold include a confidence score; lines above
+# omit it to save tokens. Set to 0 to never show confidence. Default: 70.
+# confidence_threshold = 70
+
 [locale]
 # ISO 4217 currency code. Stored in the database on first run; after that the
 # database value is authoritative. Override: MICASA_LOCALE_CURRENCY env var.