micasa-dev
diff --git a/‎.claude/codebase/types.md‎
Lines changed: 3 additions & 2 deletions b/‎.claude/codebase/types.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎cmd/micasa/main.go‎
Lines changed: 3 additions & 1 deletion b/‎cmd/micasa/main.go‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/content/docs/reference/configuration.md‎
Lines changed: 25 additions & 19 deletions b/‎docs/content/docs/reference/configuration.md‎
Lines changed: 25 additions & 19 deletions
diff --git a/‎internal/config/config.go‎
Lines changed: 71 additions & 43 deletions b/‎internal/config/config.go‎
Lines changed: 71 additions & 43 deletions
@@ -120,12 +120,13 @@ Col* (e.g., ColID = "id", ColName = "name", ColDeletedAt = "deleted_at")
 - LLM (provider, model, baseURL, apiKey, timeout, thinking, extraContext)
   - Chat/Extraction overrides (LLMChatOverride, LLMExtractionOverride)
 - Documents (MaxFileSize ByteSize, CacheTTL Duration)
-- Extraction (MaxPages int, Enabled *bool, TextTimeout, LLMTimeout)
+- Extraction (MaxPages int, Enable *bool, LLMTimeout)
+  - OCR (Enable *bool, ConfidenceThreshold int)
 - Locale (Currency string)
 
 ### Defaults
 - Provider: "ollama", Model: "qwen3", BaseURL: "http://localhost:11434"
-- MaxPages: 20, CacheTTL: 30 days, TextTimeout: 30s, LLMTimeout: 5m
+- MaxPages: 20, CacheTTL: 30 days, LLMTimeout: 5m
 
 ## LLM Types (internal/llm/)
 
 
@@ -161,7 +161,9 @@ func (cmd *runCmd) Run() error {
 	exCfg := cfg.LLM.ExtractionConfig()
 	extractors := extract.DefaultExtractors(
 		cfg.Extraction.MaxPages,
-		cfg.Extraction.TextTimeoutDuration(),
+		0, // pdftotext uses its own internal default timeout (30s)
+		cfg.Extraction.IsOCREnabled(),
+		cfg.Extraction.OCR.ConfidenceThreshold,
 	)
 	opts.SetExtraction(
 		exCfg.Provider,
 
@@ -111,18 +111,19 @@ You can always infer the env var name from the config key.
 | `MICASA_LLM_MODEL` | `qwen3` | `llm.model` | LLM model name |
 | `MICASA_LLM_API_KEY` | (empty) | `llm.api_key` | LLM API key for cloud providers |
 | `MICASA_LLM_EXTRA_CONTEXT` | (empty) | `llm.extra_context` | Custom context appended to LLM system prompts |
-| `MICASA_LLM_TIMEOUT` | `5s` | `llm.timeout` | LLM operation timeout |
+| `MICASA_LLM_TIMEOUT` | `5m` | `llm.timeout` | Max time for a single LLM response |
 | `MICASA_LLM_THINKING` | (unset) | `llm.thinking` | Enable model thinking for chat |
 | `MICASA_DOCUMENTS_MAX_FILE_SIZE` | `50 MiB` | `documents.max_file_size` | Max document import size |
 | `MICASA_DOCUMENTS_CACHE_TTL` | `30d` | `documents.cache_ttl` | Document cache lifetime |
 | `MICASA_DOCUMENTS_CACHE_TTL_DAYS` | -- | `documents.cache_ttl_days` | Deprecated; use `MICASA_DOCUMENTS_CACHE_TTL` |
 | `MICASA_DOCUMENTS_FILE_PICKER_DIR` | (Downloads) | `documents.file_picker_dir` | Starting directory for the file picker |
 | `MICASA_EXTRACTION_MODEL` | (chat model) | `extraction.model` | LLM model for document extraction |
-| `MICASA_EXTRACTION_ENABLED` | `true` | `extraction.enabled` | Enable/disable LLM extraction |
+| `MICASA_EXTRACTION_ENABLE` | `true` | `extraction.enable` | Enable/disable LLM extraction |
 | `MICASA_EXTRACTION_THINKING` | `false` | `extraction.thinking` | Enable model thinking for extraction |
-| `MICASA_EXTRACTION_TEXT_TIMEOUT` | `30s` | `extraction.text_timeout` | pdftotext timeout |
 | `MICASA_EXTRACTION_MAX_PAGES` | `0` | `extraction.max_pages` | Max pages to OCR per document (0 = no limit) |
 | `MICASA_EXTRACTION_LLM_TIMEOUT` | `5m` | `extraction.llm_timeout` | LLM extraction timeout |
+| `MICASA_EXTRACTION_OCR_ENABLE` | `true` | `extraction.ocr.enable` | Enable/disable OCR on documents |
+| `MICASA_EXTRACTION_OCR_CONFIDENCE_THRESHOLD` | `0` | `extraction.ocr.confidence_threshold` | Min tesseract confidence (0-100) |
 | `MICASA_LOCALE_CURRENCY` | (auto-detect) | `locale.currency` | ISO 4217 currency code (e.g. `USD`, `EUR`, `GBP`) |
 
 {{% details title="Deprecated env var names" closed="true" %}}
@@ -139,8 +140,8 @@ warning. They will be removed in a future release.
 | `MICASA_CURRENCY` | `MICASA_LOCALE_CURRENCY` |
 | `MICASA_EXTRACTION_MAX_EXTRACT_PAGES` | `MICASA_EXTRACTION_MAX_PAGES` |
 | `MICASA_MAX_EXTRACT_PAGES` | `MICASA_EXTRACTION_MAX_PAGES` |
-| `MICASA_TEXT_TIMEOUT` | `MICASA_EXTRACTION_TEXT_TIMEOUT` |
 | `MICASA_MAX_OCR_PAGES` | `MICASA_EXTRACTION_MAX_PAGES` |
+| `MICASA_EXTRACTION_ENABLED` | `MICASA_EXTRACTION_ENABLE` |
 | `MICASA_EXTRACTION_MODEL` | `MICASA_LLM_EXTRACTION_MODEL` |
 | `MICASA_EXTRACTION_THINKING` | `MICASA_LLM_EXTRACTION_THINKING` |
 
@@ -177,12 +178,12 @@ micasa   # uses llama3.3 instead of the default qwen3
 
 ### `MICASA_LLM_TIMEOUT`
 
-Sets the LLM timeout for quick operations (ping, model listing), overriding
-the config file value. Uses Go duration syntax:
+Sets the maximum time for a single LLM response (including streaming),
+overriding the config file value. Uses Go duration syntax:
 
 ```sh
-export MICASA_LLM_TIMEOUT=15s
-micasa   # waits up to 15s for LLM server responses
+export MICASA_LLM_TIMEOUT=10m
+micasa   # waits up to 10m for LLM responses
 ```
 
 ### `MICASA_DOCUMENTS_MAX_FILE_SIZE`
@@ -278,10 +279,10 @@ model = "qwen3"
 # Use this to inject domain-specific details about your house, region, etc.
 # extra_context = "My house is a 1920s craftsman in Portland, OR."
 
-# Timeout for quick LLM server operations (ping, model listing).
-# Go duration syntax: "5s", "10s", "500ms", etc. Default: "5s".
-# Increase if your LLM server is slow to respond.
-# timeout = "5s"
+# Max time for a single LLM response (including streaming).
+# Go duration syntax: "5m", "10m", etc. Default: "5m".
+# Increase for slow models or complex queries.
+# timeout = "5m"
 
 # Enable model thinking mode for chat (e.g. qwen3 <think> blocks).
 # Unset = don't send (server default), true = enable, false = disable.
@@ -302,10 +303,6 @@ model = "qwen3"
 # with small, fast models optimized for structured JSON output.
 # model = "qwen2.5:7b"
 
-# Timeout for pdftotext. Go duration syntax: "30s", "1m", etc. Default: "30s".
-# Increase if you routinely process very large PDFs.
-# text_timeout = "30s"
-
 # Maximum pages to OCR for scanned documents. 0 = no limit. Default: 0.
 # max_pages = 0
 
@@ -338,7 +335,7 @@ set in `[llm.chat]` and `[llm.extraction]`.
 | `model` | string | `qwen3` | Model identifier sent in chat requests. Must be available on the server. |
 | `api_key` | string | (empty) | Authentication credential. Required for cloud providers (Anthropic, OpenAI, etc.). Leave empty for local servers. |
 | `extra_context` | string | (empty) | Free-form text appended to all LLM system prompts. Useful for telling the model about your house or regional conventions. Currency is handled automatically via `[locale]`. |
-| `timeout` | string | `"5s"` | Max wait time for quick LLM operations (ping, model listing). Go duration syntax, e.g. `"10s"`, `"500ms"`. Increase for slow servers. |
+| `timeout` | string | `"5m"` | Max time for a single LLM response (including streaming). Go duration syntax, e.g. `"10m"`. Increase for slow models. |
 | `thinking` | bool | (unset) | Enable model thinking mode (e.g. qwen3 `<think>` blocks). Unset = don't send the option (server default). |
 
 ### `[llm.chat]` section
@@ -391,11 +388,20 @@ dates, vendor matching) from uploaded documents.
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | `model` | string | (chat model) | **Deprecated.** Use `[llm.extraction] model` instead. Falls back to `llm.model` if empty. |
-| `text_timeout` | string | `"30s"` | Max time for `pdftotext` to run. Go duration syntax, e.g. `"1m"`. Increase for very large PDFs. |
 | `max_pages` | int | `0` | Maximum pages to OCR per scanned document. 0 means no limit. |
-| `enabled` | bool | `true` | Set to `false` to disable LLM-powered extraction. When disabled, no structured data is extracted from documents. |
+| `enable` | bool | `true` | Set to `false` to disable LLM-powered structured extraction. OCR and pdftotext still run (see `[extraction.ocr]`). |
+| `enabled` | bool | -- | **Deprecated.** Use `enable` instead. |
 | `thinking` | bool | `false` | **Deprecated.** Use `[llm.extraction] thinking` instead. |
 
+### `[extraction.ocr]` section
+
+OCR sub-pipeline settings. Requires `tesseract` and `pdftocairo`.
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `enable` | bool | `true` | Set to `false` to disable OCR on documents. When disabled, scanned pages and images produce no text. |
+| `confidence_threshold` | int | `0` | Minimum tesseract word confidence (0-100) to keep. Words below this threshold are dropped. 0 means no filtering. |
+
 ### `[locale]` section
 
 Locale and currency settings. Controls currency formatting across all money
 
@@ -264,14 +264,14 @@ type Extraction struct {
 	// documents. 0 means no limit (all pages). Default: 0.
 	MaxPages int `toml:"max_pages"`
 
-	// Enabled controls whether LLM-powered extraction runs when a document
-	// is uploaded. When disabled, no structured data is extracted -- OCR and
-	// pdftotext are internal pipeline steps, not standalone features. Default: true.
-	Enabled *bool `toml:"enabled,omitempty"`
+	// Enable controls whether LLM-powered structured extraction runs when
+	// a document is uploaded. When disabled, no structured data is extracted
+	// from documents. OCR and pdftotext still run independently (controlled
+	// by [extraction.ocr]) to populate the document's stored text. Default: true.
+	Enable *bool `toml:"enable,omitempty"`
 
-	// TextTimeout is the maximum time to wait for pdftotext. Go duration
-	// string, e.g. "30s", "1m". Default: "30s".
-	TextTimeout string `toml:"text_timeout"`
+	// Enabled is the deprecated spelling; migrated to Enable on load.
+	Enabled *bool `toml:"enabled,omitempty"`
 
 	// LLMTimeout is the maximum time to wait for the LLM extraction
 	// inference step. Go duration string, e.g. "5m", "90s". Default: "5m".
@@ -281,28 +281,39 @@ type Extraction struct {
 	// Supported values: none, low, medium, high, auto.
 	// Empty string = don't send (server default). Default: empty.
 	Thinking string `toml:"thinking,omitempty"`
+
+	// OCR holds settings for the OCR sub-pipeline.
+	OCR OCR `toml:"ocr" doc:"OCR sub-pipeline. Requires tesseract and pdftocairo."`
+}
+
+// OCR holds settings for the OCR sub-pipeline within extraction.
+type OCR struct {
+	// Enable controls whether OCR runs on uploaded documents.
+	// When disabled, scanned pages and images produce no text. Default: true.
+	Enable *bool `toml:"enable,omitempty"`
+
+	// ConfidenceThreshold is the minimum tesseract word confidence (0-100)
+	// to keep in OCR output. Words below this threshold are dropped.
+	// 0 means no filtering (all words kept). Default: 0.
+	ConfidenceThreshold int `toml:"confidence_threshold"`
 }
 
 // IsEnabled returns whether LLM extraction is enabled. Defaults to true
 // when the field is unset.
 func (e Extraction) IsEnabled() bool {
-	if e.Enabled != nil {
-		return *e.Enabled
+	if e.Enable != nil {
+		return *e.Enable
 	}
 	return true
 }
 
-// TextTimeoutDuration returns the parsed text extraction timeout, falling
-// back to DefaultTextTimeout if the value is empty or unparseable.
-func (e Extraction) TextTimeoutDuration() time.Duration {
-	if e.TextTimeout == "" {
-		return DefaultTextTimeout
-	}
-	d, err := time.ParseDuration(e.TextTimeout)
-	if err != nil {
-		return DefaultTextTimeout
+// IsOCREnabled returns whether OCR is enabled. Defaults to true when
+// the field is unset.
+func (e Extraction) IsOCREnabled() bool {
+	if e.OCR.Enable != nil {
+		return *e.OCR.Enable
 	}
-	return d
+	return true
 }
 
 // LLMTimeoutDuration returns the parsed LLM extraction timeout, falling
@@ -341,7 +352,6 @@ const (
 	DefaultLLMExtractionTimeout = DefaultLLMTimeout
 	DefaultCacheTTL             = 30 * 24 * time.Hour // 30 days
 	DefaultMaxPages             = 0
-	DefaultTextTimeout          = 30 * time.Second
 	configRelPath               = "micasa/config.toml"
 )
 
@@ -378,6 +388,15 @@ func LoadFromPath(path string) (Config, error) {
 		return cfg, err
 	}
 
+	// Clear deprecated Enabled again: applyEnvOverrides may have
+	// repopulated it from MICASA_EXTRACTION_ENABLED.
+	if cfg.Extraction.Enabled != nil {
+		if cfg.Extraction.Enable == nil {
+			cfg.Extraction.Enable = cfg.Extraction.Enabled
+		}
+		cfg.Extraction.Enabled = nil
+	}
+
 	// Normalize base URLs: strip trailing slash and /v1 suffix --
 	// providers handle their own path construction.
 	cfg.LLM.BaseURL = normalizeBaseURL(cfg.LLM.BaseURL)
@@ -488,22 +507,6 @@ func LoadFromPath(path string) (Config, error) {
 		)
 	}
 
-	if cfg.Extraction.TextTimeout != "" {
-		d, err := time.ParseDuration(cfg.Extraction.TextTimeout)
-		if err != nil {
-			return cfg, fmt.Errorf(
-				"extraction.text_timeout: invalid duration %q -- use Go syntax like \"30s\" or \"1m\"",
-				cfg.Extraction.TextTimeout,
-			)
-		}
-		if d <= 0 {
-			return cfg, fmt.Errorf(
-				"extraction.text_timeout must be positive, got %s",
-				cfg.Extraction.TextTimeout,
-			)
-		}
-	}
-
 	if cfg.Extraction.LLMTimeout != "" {
 		d, err := time.ParseDuration(cfg.Extraction.LLMTimeout)
 		if err != nil {
@@ -527,6 +530,13 @@ func LoadFromPath(path string) (Config, error) {
 		)
 	}
 
+	if cfg.Extraction.OCR.ConfidenceThreshold < 0 || cfg.Extraction.OCR.ConfidenceThreshold > 100 {
+		return cfg, fmt.Errorf(
+			"extraction.ocr.confidence_threshold must be 0-100, got %d",
+			cfg.Extraction.OCR.ConfidenceThreshold,
+		)
+	}
+
 	checkFilePermissions(&cfg, path)
 
 	return cfg, nil
@@ -905,6 +915,17 @@ func migrateRenamedKeys(cfg *Config, md toml.MetaData, path string) {
 		)
 	}
 
+	// extraction.enabled -> extraction.enable (v1.78)
+	if md.IsDefined("extraction", "enabled") {
+		if !md.IsDefined("extraction", "enable") {
+			cfg.Extraction.Enable = cfg.Extraction.Enabled
+		}
+		cfg.Warnings = append(cfg.Warnings,
+			"extraction.enabled is deprecated -- use extraction.enable instead",
+		)
+	}
+	cfg.Extraction.Enabled = nil // never propagate the deprecated field
+
 	// extraction.model -> llm.extraction.model (v1.59)
 	if md.IsDefined("extraction", "model") && !md.IsDefined("llm", "extraction", "model") {
 		cfg.LLM.Extraction.Model = cfg.Extraction.Model
@@ -926,6 +947,9 @@ func migrateRenamedKeys(cfg *Config, md toml.MetaData, path string) {
 // replacements. Processed newest-first so that the most recent intermediate
 // name wins when multiple generations of the same variable are set.
 var envRenames = []struct{ old, canonical string }{
+	// v1.78: extraction.enabled -> extraction.enable.
+	{"MICASA_EXTRACTION_ENABLED", "MICASA_EXTRACTION_ENABLE"},
+
 	// v1.77: env var names now derived from dotted config paths.
 	{"MICASA_CURRENCY", "MICASA_LOCALE_CURRENCY"},
 	{"MICASA_MAX_DOCUMENT_SIZE", "MICASA_DOCUMENTS_MAX_FILE_SIZE"},
@@ -934,7 +958,6 @@ var envRenames = []struct{ old, canonical string }{
 	{"MICASA_FILE_PICKER_DIR", "MICASA_DOCUMENTS_FILE_PICKER_DIR"},
 	{"MICASA_EXTRACTION_MAX_EXTRACT_PAGES", "MICASA_EXTRACTION_MAX_PAGES"},
 	{"MICASA_MAX_EXTRACT_PAGES", "MICASA_EXTRACTION_MAX_PAGES"},
-	{"MICASA_TEXT_TIMEOUT", "MICASA_EXTRACTION_TEXT_TIMEOUT"},
 
 	// v1.59
 	{"MICASA_EXTRACTION_MODEL", "MICASA_LLM_EXTRACTION_MODEL"},
@@ -1102,9 +1125,9 @@ model = "` + DefaultModel + `"
 # file_picker_dir = "/home/user/Documents"
 
 [extraction]
-# Timeout for pdftotext. Go duration syntax: "30s", "1m", etc. Default: "30s".
-# Increase if you routinely process very large PDFs.
-# text_timeout = "30s"
+# Set to false to disable LLM-powered structured extraction. OCR and pdftotext
+# still run (see [extraction.ocr]) to populate document text for search/display.
+# enable = true
 
 # Timeout for LLM extraction inference. Go duration syntax: "5m", "90s", etc.
 # Default: "5m". Increase for slow local models or complex documents.
@@ -1113,9 +1136,14 @@ model = "` + DefaultModel + `"
 # Maximum pages for async extraction of scanned documents. 0 = no limit. Default: 0.
 # max_pages = 0
 
-# Set to false to disable LLM-powered extraction even when LLM is configured.
-# When disabled, no structured data is extracted from documents.
-# enabled = true
+# [extraction.ocr]
+# Set to false to disable OCR on uploaded documents. When disabled, scanned
+# pages and images produce no text. Default: true.
+# enable = true
+
+# Minimum tesseract word confidence (0-100) to keep. Words below this
+# threshold are dropped. 0 = no filtering. Default: 0.
+# confidence_threshold = 0
 
 [locale]
 # ISO 4217 currency code. Stored in the database on first run; after that the