micasa-dev
diff --git a/‎cmd/micasa/main.go‎
Lines changed: 2 additions & 0 deletions b/‎cmd/micasa/main.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎internal/app/extraction.go‎
Lines changed: 31 additions & 6 deletions b/‎internal/app/extraction.go‎
Lines changed: 31 additions & 6 deletions
diff --git a/‎internal/app/extraction_test.go‎
Lines changed: 100 additions & 0 deletions b/‎internal/app/extraction_test.go‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎internal/app/model.go‎
Lines changed: 2 additions & 0 deletions b/‎internal/app/model.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎internal/app/types.go‎
Lines changed: 10 additions & 2 deletions b/‎internal/app/types.go‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎internal/config/config.go‎
Lines changed: 40 additions & 0 deletions b/‎internal/config/config.go‎
Lines changed: 40 additions & 0 deletions
@@ -173,6 +173,8 @@ func (cmd *runCmd) Run() error {
 		extractors,
 		cfg.Extraction.IsEnabled(),
 		cfg.Extraction.LLMTimeoutDuration(),
+		cfg.Extraction.IsOCRTSV(),
+		cfg.Extraction.OCRConfThreshold(),
 	)
 
 	model, err := app.NewModel(store, opts)
 
@@ -514,12 +514,14 @@ func (m *Model) llmExtractCmd(ctx context.Context, ex *extractionLogState) tea.C
 			ex.llmCancelFn = cancel
 		}
 		messages := extract.BuildExtractionPrompt(extract.ExtractionPromptInput{
-			DocID:     ex.DocID,
-			Filename:  ex.Filename,
-			MIME:      ex.mime,
-			SizeBytes: int64(len(ex.fileData)),
-			Schema:    schemaCtx,
-			Sources:   ex.sources,
+			DocID:         ex.DocID,
+			Filename:      ex.Filename,
+			MIME:          ex.mime,
+			SizeBytes:     int64(len(ex.fileData)),
+			Schema:        schemaCtx,
+			Sources:       ex.sources,
+			SendTSV:       m.ex.ocrTSV,
+			ConfThreshold: m.ex.ocrConfThreshold,
 		})
 		ch, err := client.ChatStream(
 			llmCtx,
@@ -923,6 +925,18 @@ func (m *Model) commitShadowOperations(ex *extractionLogState, ops []extract.Ope
 	return nil
 }
 
+// toggleExtractionTSV flips the ocrTSV setting and reruns the LLM step
+// so the user can compare extraction quality with and without spatial layout.
+func (m *Model) toggleExtractionTSV() tea.Cmd {
+	m.ex.ocrTSV = !m.ex.ocrTSV
+	if m.ex.ocrTSV {
+		m.setStatusInfo("layout on")
+	} else {
+		m.setStatusInfo("layout off")
+	}
+	return m.rerunLLMExtraction()
+}
+
 // rerunLLMExtraction resets the LLM step and re-runs it.
 func (m *Model) rerunLLMExtraction() tea.Cmd {
 	ex := m.ex.extraction
@@ -1074,6 +1088,10 @@ func (m *Model) handleExtractionPipelineKey(msg tea.KeyMsg) tea.Cmd {
 		if ex.Done && ex.hasLLM && ex.cursorStep() == stepLLM {
 			return m.activateExtractionModelPicker()
 		}
+	case keyT:
+		if ex.Done && ex.hasLLM {
+			return m.toggleExtractionTSV()
+		}
 	case keyA:
 		if ex.Done {
 			m.acceptExtraction()
@@ -1506,6 +1524,13 @@ func (m *Model) buildExtractionPipelineOverlay(
 			hints = append(hints, m.helpItem(keyX, "explore"))
 		}
 		if ex.Done {
+			if ex.hasLLM {
+				label := "layout on"
+				if m.ex.ocrTSV {
+					label = "layout off"
+				}
+				hints = append(hints, m.helpItem(keyT, label))
+			}
 			hints = append(hints, m.helpItem(keyA, "accept"), m.helpItem(keyEsc, "discard"))
 		} else {
 			hints = append(hints,
 
@@ -2831,3 +2831,103 @@ func TestAccept_DeferredDoc_WorksWithoutLLMStep(t *testing.T) {
 	require.NoError(t, err)
 	assert.Equal(t, "better ocr text", full.ExtractedText)
 }
+
+// --- TSV toggle ---
+
+func TestExtractionTSVToggle_TogglesOCRTSV(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	assert.False(t, m.ex.ocrTSV, "ocrTSV should start false in test setup")
+
+	// Press t to toggle layout on.
+	sendExtractionKey(m, keyT)
+	assert.True(t, m.ex.ocrTSV, "t should toggle ocrTSV on")
+
+	// LLM step should be reset for rerun.
+	assert.Equal(t, stepRunning, ex.Steps[stepLLM].Status,
+		"LLM step should be rerunning after toggle")
+}
+
+func TestExtractionTSVToggle_TogglesOff(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+	m.ex.ocrTSV = true
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should toggle ocrTSV off")
+}
+
+func TestExtractionTSVToggle_IgnoredWhenNotDone(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepRunning,
+		stepLLM:     stepPending,
+	})
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should be ignored when extraction is not done")
+}
+
+func TestExtractionTSVToggle_IgnoredWithoutLLM(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should be ignored when no LLM step")
+}
+
+func TestExtractionTSVToggle_StatusMessage(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	sendExtractionKey(m, keyT)
+	assert.Contains(t, m.status.Text, "layout on")
+
+	// Simulate LLM completing again so we can toggle off.
+	ex.Done = true
+	ex.Steps[stepLLM] = extractionStepInfo{Status: stepDone}
+
+	sendExtractionKey(m, keyT)
+	assert.Contains(t, m.status.Text, "layout off")
+}
+
+func TestExtractionTSVToggle_HintShownInFooter(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+	m.width = 120
+	m.height = 40
+
+	view := m.View()
+	assert.Contains(t, view, "layout", "footer should show layout hint when done with LLM")
+}
@@ -273,6 +273,8 @@ func NewModel(store *data.Store, options Options) (*Model, error) {
 			extractionTimeout:   options.ExtractionConfig.Timeout,
 			extractionThinking:  options.ExtractionConfig.Thinking,
 			extractionEnabled:   options.ExtractionConfig.Enabled,
+			ocrTSV:              options.ExtractionConfig.OCRTSV,
+			ocrConfThreshold:    options.ExtractionConfig.OCRConfThreshold,
 			extractors:          options.ExtractionConfig.Extractors,
 			llmInferenceTimeout: options.ExtractionConfig.LLMInferenceTimeout,
 		},
 
@@ -94,6 +94,8 @@ type extractState struct {
 	extractionTimeout   time.Duration
 	extractionThinking  string
 	extractionEnabled   bool
+	ocrTSV              bool
+	ocrConfThreshold    int
 	extractionClient    *llm.Client
 	extractors          []extract.Extractor
 	extractionReady     bool
@@ -292,8 +294,10 @@ type extractionConfig struct {
 	Thinking            string // reasoning effort level
 	LLMInferenceTimeout time.Duration
 
-	Extractors []extract.Extractor // configured extractors; nil = defaults
-	Enabled    bool                // LLM extraction enabled
+	Extractors       []extract.Extractor // configured extractors; nil = defaults
+	Enabled          bool                // LLM extraction enabled
+	OCRTSV           bool                // send spatial layout annotations to LLM
+	OCRConfThreshold int                 // confidence threshold for spatial annotations
 }
 
 // SetExtraction configures the extraction pipeline on the Options.
@@ -304,6 +308,8 @@ func (o *Options) SetExtraction(
 	extractors []extract.Extractor,
 	enabled bool,
 	llmInferenceTimeout time.Duration,
+	ocrTSV bool,
+	ocrConfThreshold int,
 ) {
 	o.ExtractionConfig = extractionConfig{
 		Provider:            provider,
@@ -315,6 +321,8 @@ func (o *Options) SetExtraction(
 		LLMInferenceTimeout: llmInferenceTimeout,
 		Extractors:          extractors,
 		Enabled:             enabled,
+		OCRTSV:              ocrTSV,
+		OCRConfThreshold:    ocrConfThreshold,
 	}
 }
 
 
@@ -281,6 +281,18 @@ type Extraction struct {
 	// Supported values: none, low, medium, high, auto.
 	// Empty string = don't send (server default). Default: empty.
 	Thinking string `toml:"thinking,omitempty" env:"MICASA_EXTRACTION_THINKING"`
+
+	// OCRTSV sends spatial layout annotations (line-level bounding boxes
+	// and confidence scores) from tesseract OCR to the LLM alongside text.
+	// This helps extraction accuracy for invoices and forms with tabular
+	// data, at ~2x token overhead. Default: true.
+	OCRTSV *bool `toml:"ocr_tsv,omitempty" env:"MICASA_EXTRACTION_OCR_TSV"`
+
+	// OCRConfThresholdVal is the confidence threshold (0-100) below which
+	// OCR confidence annotations are included in spatial layout output.
+	// Lines with min confidence >= this value omit the score to save tokens.
+	// Set to 0 to never show confidence. Default: 70.
+	OCRConfThresholdVal *int `toml:"ocr_conf_threshold,omitempty" env:"MICASA_EXTRACTION_OCR_CONF_THRESHOLD"`
 }
 
 // IsEnabled returns whether LLM extraction is enabled. Defaults to true
@@ -324,6 +336,24 @@ func (e Extraction) ThinkingLevel() string {
 	return e.Thinking
 }
 
+// IsOCRTSV returns whether spatial layout annotations from tesseract OCR
+// should be sent to the LLM alongside text. Defaults to true.
+func (e Extraction) IsOCRTSV() bool {
+	if e.OCRTSV != nil {
+		return *e.OCRTSV
+	}
+	return true
+}
+
+// OCRConfThreshold returns the confidence threshold below which OCR
+// confidence annotations appear in spatial output. Defaults to 70.
+func (e Extraction) OCRConfThreshold() int {
+	if e.OCRConfThresholdVal != nil {
+		return *e.OCRConfThresholdVal
+	}
+	return 70
+}
+
 // ResolvedModel returns the extraction model, falling back to the given
 // chat model if no extraction-specific model is configured.
 func (e Extraction) ResolvedModel(chatModel string) string {
@@ -1091,6 +1121,16 @@ model = "` + DefaultModel + `"
 # When disabled, no structured data is extracted from documents.
 # enabled = true
 
+# Send spatial layout annotations (line-level bounding boxes) from tesseract
+# OCR to the LLM alongside text. Improves extraction accuracy for invoices
+# and forms with tabular data, at ~2x token overhead. Default: true.
+# ocr_tsv = true
+
+# Confidence threshold (0-100) for spatial annotations. Lines with OCR
+# confidence below this threshold include a confidence score; lines above
+# omit it to save tokens. Set to 0 to never show confidence. Default: 70.
+# ocr_conf_threshold = 70
+
 [locale]
 # ISO 4217 currency code. Stored in the database on first run; after that the
 # database value is authoritative. Override: MICASA_CURRENCY env var.
Original file line number	Diff line number	Diff line change
`@@ -173,6 +173,8 @@ func (cmd *runCmd) Run() error {`
`173`	`173`	`extractors,`
`174`	`174`	`cfg.Extraction.IsEnabled(),`
`175`	`175`	`cfg.Extraction.LLMTimeoutDuration(),`
	`176`	`+ cfg.Extraction.IsOCRTSV(),`
	`177`	`+ cfg.Extraction.OCRConfThreshold(),`
`176`	`178`	`)`
`177`	`179`
`178`	`180`	`model, err := app.NewModel(store, opts)`