micasa-dev
diff --git a/‎cmd/micasa/main.go‎
Lines changed: 2 additions & 0 deletions b/‎cmd/micasa/main.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎internal/app/extraction.go‎
Lines changed: 31 additions & 6 deletions b/‎internal/app/extraction.go‎
Lines changed: 31 additions & 6 deletions
diff --git a/‎internal/app/extraction_test.go‎
Lines changed: 100 additions & 0 deletions b/‎internal/app/extraction_test.go‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎internal/app/model.go‎
Lines changed: 2 additions & 0 deletions b/‎internal/app/model.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎internal/app/types.go‎
Lines changed: 10 additions & 2 deletions b/‎internal/app/types.go‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎internal/config/config.go‎
Lines changed: 47 additions & 0 deletions b/‎internal/config/config.go‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎internal/config/config_test.go‎
Lines changed: 84 additions & 6 deletions b/‎internal/config/config_test.go‎
Lines changed: 84 additions & 6 deletions
@@ -173,6 +173,8 @@ func (cmd *runCmd) Run() error {
 		extractors,
 		cfg.Extraction.IsEnabled(),
 		cfg.Extraction.LLMTimeoutDuration(),
+		cfg.Extraction.IsOCRTSV(),
+		cfg.Extraction.OCRConfThreshold(),
 	)
 
 	model, err := app.NewModel(store, opts)
 
@@ -517,12 +517,14 @@ func (m *Model) llmExtractCmd(ctx context.Context, ex *extractionLogState) tea.C
 			ex.llmCancelFn = cancel
 		}
 		messages := extract.BuildExtractionPrompt(extract.ExtractionPromptInput{
-			DocID:     ex.DocID,
-			Filename:  ex.Filename,
-			MIME:      ex.mime,
-			SizeBytes: int64(len(ex.fileData)),
-			Schema:    schemaCtx,
-			Sources:   ex.sources,
+			DocID:         ex.DocID,
+			Filename:      ex.Filename,
+			MIME:          ex.mime,
+			SizeBytes:     int64(len(ex.fileData)),
+			Schema:        schemaCtx,
+			Sources:       ex.sources,
+			SendTSV:       m.ex.ocrTSV,
+			ConfThreshold: m.ex.ocrConfThreshold,
 		})
 		ch, err := client.ChatStream(
 			llmCtx,
@@ -926,6 +928,18 @@ func (m *Model) commitShadowOperations(ex *extractionLogState, ops []extract.Ope
 	return nil
 }
 
+// toggleExtractionTSV flips the ocrTSV setting and reruns the LLM step
+// so the user can compare extraction quality with and without spatial layout.
+func (m *Model) toggleExtractionTSV() tea.Cmd {
+	m.ex.ocrTSV = !m.ex.ocrTSV
+	if m.ex.ocrTSV {
+		m.setStatusInfo("layout on")
+	} else {
+		m.setStatusInfo("layout off")
+	}
+	return m.rerunLLMExtraction()
+}
+
 // rerunLLMExtraction resets the LLM step and re-runs it.
 func (m *Model) rerunLLMExtraction() tea.Cmd {
 	ex := m.ex.extraction
@@ -1079,6 +1093,10 @@ func (m *Model) handleExtractionPipelineKey(msg tea.KeyMsg) tea.Cmd {
 		if ex.Done && ex.hasLLM && ex.cursorStep() == stepLLM {
 			return m.activateExtractionModelPicker()
 		}
+	case keyT:
+		if ex.Done && ex.hasLLM {
+			return m.toggleExtractionTSV()
+		}
 	case keyA:
 		if ex.Done {
 			m.acceptExtraction()
@@ -1511,6 +1529,13 @@ func (m *Model) buildExtractionPipelineOverlay(
 			hints = append(hints, m.helpItem(keyX, "explore"))
 		}
 		if ex.Done {
+			if ex.hasLLM {
+				label := "layout on"
+				if m.ex.ocrTSV {
+					label = "layout off"
+				}
+				hints = append(hints, m.helpItem(keyT, label))
+			}
 			hints = append(hints, m.helpItem(keyA, "accept"), m.helpItem(keyEsc, "discard"))
 		} else {
 			hints = append(hints,
 
@@ -2842,3 +2842,103 @@ func TestAccept_DeferredDoc_WorksWithoutLLMStep(t *testing.T) {
 	require.NoError(t, err)
 	assert.Equal(t, "better ocr text", full.ExtractedText)
 }
+
+// --- TSV toggle ---
+
+func TestExtractionTSVToggle_TogglesOCRTSV(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	assert.False(t, m.ex.ocrTSV, "ocrTSV should start false in test setup")
+
+	// Press t to toggle layout on.
+	sendExtractionKey(m, keyT)
+	assert.True(t, m.ex.ocrTSV, "t should toggle ocrTSV on")
+
+	// LLM step should be reset for rerun.
+	assert.Equal(t, stepRunning, ex.Steps[stepLLM].Status,
+		"LLM step should be rerunning after toggle")
+}
+
+func TestExtractionTSVToggle_TogglesOff(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+	m.ex.ocrTSV = true
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should toggle ocrTSV off")
+}
+
+func TestExtractionTSVToggle_IgnoredWhenNotDone(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepRunning,
+		stepLLM:     stepPending,
+	})
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should be ignored when extraction is not done")
+}
+
+func TestExtractionTSVToggle_IgnoredWithoutLLM(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	sendExtractionKey(m, keyT)
+	assert.False(t, m.ex.ocrTSV, "t should be ignored when no LLM step")
+}
+
+func TestExtractionTSVToggle_StatusMessage(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+
+	sendExtractionKey(m, keyT)
+	assert.Contains(t, m.status.Text, "layout on")
+
+	// Simulate LLM completing again so we can toggle off.
+	ex.Done = true
+	ex.Steps[stepLLM] = extractionStepInfo{Status: stepDone}
+
+	sendExtractionKey(m, keyT)
+	assert.Contains(t, m.status.Text, "layout off")
+}
+
+func TestExtractionTSVToggle_HintShownInFooter(t *testing.T) {
+	t.Parallel()
+	m := newExtractionModel(t, map[extractionStep]stepStatus{
+		stepText:    stepDone,
+		stepExtract: stepDone,
+		stepLLM:     stepDone,
+	})
+	ex := m.ex.extraction
+	ex.Done = true
+	m.width = 120
+	m.height = 40
+
+	view := m.View()
+	assert.Contains(t, view, "layout", "footer should show layout hint when done with LLM")
+}
@@ -273,6 +273,8 @@ func NewModel(store *data.Store, options Options) (*Model, error) {
 			extractionTimeout:   options.ExtractionConfig.Timeout,
 			extractionThinking:  options.ExtractionConfig.Thinking,
 			extractionEnabled:   options.ExtractionConfig.Enabled,
+			ocrTSV:              options.ExtractionConfig.OCRTSV,
+			ocrConfThreshold:    options.ExtractionConfig.OCRConfThreshold,
 			extractors:          options.ExtractionConfig.Extractors,
 			llmInferenceTimeout: options.ExtractionConfig.LLMInferenceTimeout,
 		},
 
@@ -94,6 +94,8 @@ type extractState struct {
 	extractionTimeout   time.Duration
 	extractionThinking  string
 	extractionEnabled   bool
+	ocrTSV              bool
+	ocrConfThreshold    int
 	extractionClient    *llm.Client
 	extractors          []extract.Extractor
 	extractionReady     bool
@@ -292,8 +294,10 @@ type extractionConfig struct {
 	Thinking            string // reasoning effort level
 	LLMInferenceTimeout time.Duration
 
-	Extractors []extract.Extractor // configured extractors; nil = defaults
-	Enabled    bool                // LLM extraction enabled
+	Extractors       []extract.Extractor // configured extractors; nil = defaults
+	Enabled          bool                // LLM extraction enabled
+	OCRTSV           bool                // send spatial layout annotations to LLM
+	OCRConfThreshold int                 // confidence threshold for spatial annotations
 }
 
 // SetExtraction configures the extraction pipeline on the Options.
@@ -304,6 +308,8 @@ func (o *Options) SetExtraction(
 	extractors []extract.Extractor,
 	enabled bool,
 	llmInferenceTimeout time.Duration,
+	ocrTSV bool,
+	ocrConfThreshold int,
 ) {
 	o.ExtractionConfig = extractionConfig{
 		Provider:            provider,
@@ -315,6 +321,8 @@ func (o *Options) SetExtraction(
 		LLMInferenceTimeout: llmInferenceTimeout,
 		Extractors:          extractors,
 		Enabled:             enabled,
+		OCRTSV:              ocrTSV,
+		OCRConfThreshold:    ocrConfThreshold,
 	}
 }
 
 
@@ -281,6 +281,24 @@ type Extraction struct {
 	// Supported values: none, low, medium, high, auto.
 	// Empty string = don't send (server default). Default: empty.
 	Thinking string `toml:"thinking,omitempty"`
+
+	// OCR holds settings for OCR spatial layout annotations sent to the LLM.
+	OCR ExtractionOCR `toml:"ocr" doc:"OCR spatial layout annotation settings."`
+}
+
+// ExtractionOCR holds OCR-specific extraction settings.
+type ExtractionOCR struct {
+	// TSV sends spatial layout annotations (line-level bounding boxes
+	// and confidence scores) from tesseract OCR to the LLM alongside text.
+	// This helps extraction accuracy for invoices and forms with tabular
+	// data, at ~2x token overhead. Default: true.
+	TSV *bool `toml:"tsv,omitempty"`
+
+	// ConfidenceThresholdVal is the confidence threshold (0-100) below which
+	// OCR confidence annotations are included in spatial layout output.
+	// Lines with min confidence >= this value omit the score to save tokens.
+	// Set to 0 to never show confidence. Default: 70.
+	ConfidenceThresholdVal *int `toml:"confidence_threshold,omitempty"`
 }
 
 // IsEnabled returns whether LLM extraction is enabled. Defaults to true
@@ -324,6 +342,24 @@ func (e Extraction) ThinkingLevel() string {
 	return e.Thinking
 }
 
+// IsOCRTSV returns whether spatial layout annotations from tesseract OCR
+// should be sent to the LLM alongside text. Defaults to true.
+func (e Extraction) IsOCRTSV() bool {
+	if e.OCR.TSV != nil {
+		return *e.OCR.TSV
+	}
+	return true
+}
+
+// OCRConfThreshold returns the confidence threshold below which OCR
+// confidence annotations appear in spatial output. Defaults to 70.
+func (e Extraction) OCRConfThreshold() int {
+	if e.OCR.ConfidenceThresholdVal != nil {
+		return *e.OCR.ConfidenceThresholdVal
+	}
+	return 70
+}
+
 // ResolvedModel returns the extraction model, falling back to the given
 // chat model if no extraction-specific model is configured.
 func (e Extraction) ResolvedModel(chatModel string) string {
@@ -1133,6 +1169,17 @@ model = "` + DefaultModel + `"
 # When disabled, no structured data is extracted from documents.
 # enabled = true
 
+[extraction.ocr]
+# Send spatial layout annotations (line-level bounding boxes) from tesseract
+# OCR to the LLM alongside text. Improves extraction accuracy for invoices
+# and forms with tabular data, at ~2x token overhead. Default: true.
+# tsv = true
+
+# Confidence threshold (0-100) for spatial annotations. Lines with OCR
+# confidence below this threshold include a confidence score; lines above
+# omit it to save tokens. Set to 0 to never show confidence. Default: 70.
+# confidence_threshold = 70
+
 [locale]
 # ISO 4217 currency code. Stored in the database on first run; after that the
 # database value is authoritative. Override: MICASA_LOCALE_CURRENCY env var.
 
@@ -629,12 +629,14 @@ func TestEnvVars(t *testing.T) {
 		"MICASA_DOCUMENTS_CACHE_TTL_DAYS":  "documents.cache_ttl_days",
 		"MICASA_DOCUMENTS_FILE_PICKER_DIR": "documents.file_picker_dir",
 
-		"MICASA_EXTRACTION_MODEL":        "extraction.model",
-		"MICASA_EXTRACTION_MAX_PAGES":    "extraction.max_pages",
-		"MICASA_EXTRACTION_ENABLED":      "extraction.enabled",
-		"MICASA_EXTRACTION_TEXT_TIMEOUT": "extraction.text_timeout",
-		"MICASA_EXTRACTION_LLM_TIMEOUT":  "extraction.llm_timeout",
-		"MICASA_EXTRACTION_THINKING":     "extraction.thinking",
+		"MICASA_EXTRACTION_MODEL":                    "extraction.model",
+		"MICASA_EXTRACTION_MAX_PAGES":                "extraction.max_pages",
+		"MICASA_EXTRACTION_ENABLED":                  "extraction.enabled",
+		"MICASA_EXTRACTION_TEXT_TIMEOUT":             "extraction.text_timeout",
+		"MICASA_EXTRACTION_LLM_TIMEOUT":              "extraction.llm_timeout",
+		"MICASA_EXTRACTION_THINKING":                 "extraction.thinking",
+		"MICASA_EXTRACTION_OCR_TSV":                  "extraction.ocr.tsv",
+		"MICASA_EXTRACTION_OCR_CONFIDENCE_THRESHOLD": "extraction.ocr.confidence_threshold",
 
 		"MICASA_LOCALE_CURRENCY": "locale.currency",
 
@@ -1346,6 +1348,82 @@ func TestResolvedFilePickerDir_EmptyFallsBackToDownloadsOrCwd(t *testing.T) {
 	assert.NotEmpty(t, result)
 }
 
+// --- OCR TSV ---
+
+func TestExtractionOCRTSVDefaultTrue(t *testing.T) {
+	cfg, err := LoadFromPath(noConfig(t))
+	require.NoError(t, err)
+	assert.True(t, cfg.Extraction.IsOCRTSV(),
+		"OCR TSV should default to true")
+}
+
+func TestExtractionOCRTSVFromTOML(t *testing.T) {
+	path := writeConfig(t, "[extraction.ocr]\ntsv = true\n")
+	cfg, err := LoadFromPath(path)
+	require.NoError(t, err)
+	assert.True(t, cfg.Extraction.IsOCRTSV())
+}
+
+func TestExtractionOCRTSVFromTOMLFalse(t *testing.T) {
+	path := writeConfig(t, "[extraction.ocr]\ntsv = false\n")
+	cfg, err := LoadFromPath(path)
+	require.NoError(t, err)
+	assert.False(t, cfg.Extraction.IsOCRTSV())
+}
+
+func TestExtractionOCRTSVFromEnv(t *testing.T) {
+	t.Setenv("MICASA_EXTRACTION_OCR_TSV", "true")
+	cfg, err := LoadFromPath(noConfig(t))
+	require.NoError(t, err)
+	assert.True(t, cfg.Extraction.IsOCRTSV())
+}
+
+func TestExtractionOCRTSVEnvInvalidReturnsError(t *testing.T) {
+	t.Setenv("MICASA_EXTRACTION_OCR_TSV", "maybe")
+	_, err := LoadFromPath(noConfig(t))
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "MICASA_EXTRACTION_OCR_TSV")
+	assert.Contains(t, err.Error(), "expected true or false")
+}
+
+// --- OCR confidence threshold ---
+
+func TestExtractionOCRConfThresholdDefault70(t *testing.T) {
+	cfg, err := LoadFromPath(noConfig(t))
+	require.NoError(t, err)
+	assert.Equal(t, 70, cfg.Extraction.OCRConfThreshold(),
+		"OCR confidence threshold should default to 70")
+}
+
+func TestExtractionOCRConfThresholdFromTOML(t *testing.T) {
+	path := writeConfig(t, "[extraction.ocr]\nconfidence_threshold = 50\n")
+	cfg, err := LoadFromPath(path)
+	require.NoError(t, err)
+	assert.Equal(t, 50, cfg.Extraction.OCRConfThreshold())
+}
+
+func TestExtractionOCRConfThresholdFromTOMLZero(t *testing.T) {
+	path := writeConfig(t, "[extraction.ocr]\nconfidence_threshold = 0\n")
+	cfg, err := LoadFromPath(path)
+	require.NoError(t, err)
+	assert.Equal(t, 0, cfg.Extraction.OCRConfThreshold(),
+		"zero threshold should disable confidence annotations")
+}
+
+func TestExtractionOCRConfThresholdFromEnv(t *testing.T) {
+	t.Setenv("MICASA_EXTRACTION_OCR_CONFIDENCE_THRESHOLD", "80")
+	cfg, err := LoadFromPath(noConfig(t))
+	require.NoError(t, err)
+	assert.Equal(t, 80, cfg.Extraction.OCRConfThreshold())
+}
+
+func TestExtractionOCRConfThresholdEnvInvalidReturnsError(t *testing.T) {
+	t.Setenv("MICASA_EXTRACTION_OCR_CONFIDENCE_THRESHOLD", "high")
+	_, err := LoadFromPath(noConfig(t))
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "MICASA_EXTRACTION_OCR_CONFIDENCE_THRESHOLD")
+}
+
 func TestFilePickerDir_FromTOML(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
Original file line number	Diff line number	Diff line change
`@@ -173,6 +173,8 @@ func (cmd *runCmd) Run() error {`
`173`	`173`	`extractors,`
`174`	`174`	`cfg.Extraction.IsEnabled(),`
`175`	`175`	`cfg.Extraction.LLMTimeoutDuration(),`
	`176`	`+ cfg.Extraction.IsOCRTSV(),`
	`177`	`+ cfg.Extraction.OCRConfThreshold(),`
`176`	`178`	`)`
`177`	`179`
`178`	`180`	`model, err := app.NewModel(store, opts)`