Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions docs/content/docs/reference/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ You can always infer the env var name from the config key.
| `MICASA_EXTRACTION_OCR_TSV_CONFIDENCE_THRESHOLD` | `70` | `extraction.ocr.tsv.confidence_threshold` | OCR confidence threshold (0-100) |
| `MICASA_DOCUMENTS_MAX_FILE_SIZE` | `50 MiB` | `documents.max_file_size` | Max document import size |
| `MICASA_DOCUMENTS_CACHE_TTL` | `30d` | `documents.cache_ttl` | Document cache lifetime |
| `MICASA_DOCUMENTS_CACHE_TTL_DAYS` | -- | `documents.cache_ttl_days` | Deprecated; use `MICASA_DOCUMENTS_CACHE_TTL` |
| `MICASA_DOCUMENTS_FILE_PICKER_DIR` | (Downloads) | `documents.file_picker_dir` | Starting directory for the file picker |
| `MICASA_LOCALE_CURRENCY` | (auto-detect) | `locale.currency` | ISO 4217 currency code (e.g. `USD`, `EUR`, `GBP`) |

Expand Down Expand Up @@ -318,7 +317,6 @@ Document attachment limits and caching.
|-----|------|---------|-------------|
| `max_file_size` | string or integer | `"50 MiB"` | Maximum file size for document imports. Accepts unitized strings (`"50 MiB"`, `"1.5 GiB"`) or bare integers (bytes). Must be positive. |
| `cache_ttl` | string or integer | `"30d"` | Cache lifetime for extracted documents. Accepts `"30d"`, `"720h"`, or bare integers (seconds). Set to `"0s"` to disable eviction. |
| `cache_ttl_days` | integer | -- | Deprecated. Use `cache_ttl` instead. Bare integer interpreted as days. Cannot be set alongside `cache_ttl`. |

### `[extraction]` section

Expand Down
2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
inherit version;
src = ./.;
subPackages = [ "cmd/micasa" ];
vendorHash = "sha256-x1Ar5Dnbpey4eRknnTvZkkRnSt9yz1/Crx1ksVHtPfs=";
vendorHash = "sha256-qvThE9Ri2El41LZC4GRHJ17aIdkEhtzjTDDgv57t5z8=";
env.CGO_ENABLED = 0;
preCheck = ''
export HOME="$(mktemp -d)"
Expand Down
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ require (
github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
github.com/charmbracelet/x/ansi v0.11.6
github.com/dustin/go-humanize v1.0.1
github.com/go-playground/validator/v10 v10.20.0
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/iancoleman/strcase v0.3.0
github.com/itchyny/gojq v0.12.18
Expand Down Expand Up @@ -57,8 +58,11 @@ require (
github.com/dlclark/regexp2 v1.11.5 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/s2a-go v0.1.9 // indirect
github.com/google/uuid v1.6.0 // indirect
Expand All @@ -69,6 +73,7 @@ require (
github.com/itchyny/timefmt-go v0.1.7 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
github.com/jinzhu/now v1.1.5 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
github.com/mailru/easyjson v0.9.1 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
Expand Down
12 changes: 12 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,21 @@ github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
Expand Down Expand Up @@ -134,6 +144,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/lrstanley/bubblezone v1.0.0 h1:bIpUaBilD42rAQwlg/4u5aTqVAt6DSRKYZuSdmkr8UA=
github.com/lrstanley/bubblezone v1.0.0/go.mod h1:kcTekA8HE/0Ll2bWzqHlhA2c513KDNLW7uDfDP4Mly8=
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
Expand Down
146 changes: 20 additions & 126 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ type ChatLLM struct {
// ollama, anthropic, openai, openrouter, deepseek, gemini, groq,
// mistral, llamacpp, llamafile. Auto-detected from base_url and
// api_key when empty.
Provider string `toml:"provider"`
Provider string `toml:"provider" validate:"provider"`

// BaseURL is the base URL for the provider's API.
// No /v1 suffix needed -- the provider handles path construction.
Expand All @@ -81,11 +81,11 @@ type ChatLLM struct {

// Timeout is the inference timeout for LLM responses (including
// streaming). Go duration string, e.g. "5m", "10m". Default: "5m".
Timeout string `toml:"timeout" default:"5m"`
Timeout string `toml:"timeout" default:"5m" validate:"omitempty,positive_duration"`

// Thinking controls the model's reasoning effort level.
// Supported: none, low, medium, high, auto. Empty = server default.
Thinking string `toml:"thinking,omitempty"`
Thinking string `toml:"thinking,omitempty" validate:"omitempty,oneof=none low medium high auto"`

// ExtraContext is custom text appended to chat system prompts.
// Useful for domain-specific details: house style, location, etc.
Expand All @@ -102,7 +102,7 @@ func (l ChatLLM) TimeoutDuration() time.Duration {
type Extraction struct {
// MaxPages is the maximum number of pages for async extraction of
// scanned documents. 0 means no limit. Default: 0.
MaxPages int `toml:"max_pages"`
MaxPages int `toml:"max_pages" validate:"min=0"`

// LLM holds the LLM connection settings for the extraction pipeline.
LLM ExtractionLLM `toml:"llm" doc:"LLM connection settings for extraction."`
Expand All @@ -121,7 +121,7 @@ type ExtractionLLM struct {

// Provider selects which LLM provider to use. See ChatLLM.Provider
// for supported values. Auto-detected when empty.
Provider string `toml:"provider"`
Provider string `toml:"provider" validate:"provider"`

// BaseURL is the base URL for the provider's API.
BaseURL string `toml:"base_url" default:"http://localhost:11434"`
Expand All @@ -134,11 +134,11 @@ type ExtractionLLM struct {
APIKey string `toml:"api_key"` //nolint:gosec // config field, not a hardcoded credential

// Timeout is the inference timeout for extraction LLM responses.
Timeout string `toml:"timeout" default:"5m"`
Timeout string `toml:"timeout" default:"5m" validate:"omitempty,positive_duration"`

// Thinking controls the model's reasoning effort level.
// Supported: none, low, medium, high, auto. Empty = server default.
Thinking string `toml:"thinking,omitempty"`
Thinking string `toml:"thinking,omitempty" validate:"omitempty,oneof=none low medium high auto"`
}

// IsEnabled returns whether LLM extraction is enabled. Defaults to true.
Expand Down Expand Up @@ -186,7 +186,7 @@ type OCRTSV struct {
// OCR confidence annotations are included in spatial layout output.
// Lines with min confidence >= this value omit the score to save
// tokens. Set to 0 to never show confidence. Default: 70.
ConfidenceThreshold *int `toml:"confidence_threshold,omitempty"`
ConfidenceThreshold *int `toml:"confidence_threshold,omitempty" validate:"omitempty,min=0,max=100"`
}

// IsEnabled returns whether TSV spatial annotations are enabled.
Expand Down Expand Up @@ -230,15 +230,12 @@ type Documents struct {
// MaxFileSize is the largest file that can be imported as a document
// attachment. Accepts unitized strings ("50 MiB") or bare integers
// (bytes). Default: 50 MiB.
MaxFileSize ByteSize `toml:"max_file_size" default:"52428800"`
MaxFileSize ByteSize `toml:"max_file_size" default:"52428800" validate:"required"`

// CacheTTL is the preferred cache lifetime setting. Accepts unitized
// strings ("30d", "720h") or bare integers (seconds). Default: 30d.
CacheTTL *Duration `toml:"cache_ttl,omitempty"`

// CacheTTLDays is deprecated; use CacheTTL instead. Kept for backward
// compatibility. Bare integer interpreted as days.
CacheTTLDays *int `toml:"cache_ttl_days,omitempty"`
// CacheTTL is the cache lifetime for extracted documents. Accepts
// unitized strings ("30d", "720h") or bare integers (seconds).
// Set to "0s" to disable eviction. Default: 30d.
CacheTTL *Duration `toml:"cache_ttl,omitempty" validate:"omitempty,nonneg_duration"`
Comment thread
cpcloud marked this conversation as resolved.

// FilePickerDir is the starting directory for the document file picker.
// Default: the system Downloads folder (e.g. ~/Downloads).
Expand Down Expand Up @@ -266,14 +263,11 @@ func (d Documents) ResolvedFilePickerDir() string {
}

// CacheTTLDuration returns the resolved cache TTL as a time.Duration.
// CacheTTL takes precedence over CacheTTLDays. Returns 0 to disable.
// Returns 0 to disable eviction.
func (d Documents) CacheTTLDuration() time.Duration {
if d.CacheTTL != nil {
return d.CacheTTL.Duration
}
if d.CacheTTLDays != nil {
return time.Duration(*d.CacheTTLDays) * 24 * time.Hour
}
return DefaultCacheTTL
}

Expand Down Expand Up @@ -307,9 +301,13 @@ func LoadFromPath(path string) (Config, error) {
data.ApplyDefaults(&cfg)

if _, err := os.Stat(path); err == nil {
if _, err := toml.DecodeFile(path, &cfg); err != nil {
md, err := toml.DecodeFile(path, &cfg)
if err != nil {
return cfg, fmt.Errorf("parse %s: %w", path, err)
}
if err := checkRemovedKeys(md); err != nil {
return cfg, err
}
}

if err := applyEnvOverrides(&cfg, nil); err != nil {
Expand All @@ -332,109 +330,13 @@ func LoadFromPath(path string) (Config, error) {
)
}

// Validate providers.
if !validProvider(cfg.Chat.LLM.Provider) {
return cfg, fmt.Errorf(
"chat.llm.provider: unknown provider %q -- supported: %s",
cfg.Chat.LLM.Provider, strings.Join(providerNames(), ", "),
)
}
if !validProvider(cfg.Extraction.LLM.Provider) {
return cfg, fmt.Errorf(
"extraction.llm.provider: unknown provider %q -- supported: %s",
cfg.Extraction.LLM.Provider, strings.Join(providerNames(), ", "),
)
}

// Validate thinking levels.
if cfg.Chat.LLM.Thinking != "" && !validThinkingLevel(cfg.Chat.LLM.Thinking) {
return cfg, fmt.Errorf(
"chat.llm.thinking: invalid level %q -- supported: none, low, medium, high, auto",
cfg.Chat.LLM.Thinking,
)
}
if cfg.Extraction.LLM.Thinking != "" && !validThinkingLevel(cfg.Extraction.LLM.Thinking) {
return cfg, fmt.Errorf(
"extraction.llm.thinking: invalid level %q -- supported: none, low, medium, high, auto",
cfg.Extraction.LLM.Thinking,
)
}

// Validate timeouts.
if err := validateTimeout(cfg.Chat.LLM.Timeout, "chat.llm"); err != nil {
return cfg, err
}
if err := validateTimeout(cfg.Extraction.LLM.Timeout, "extraction.llm"); err != nil {
if err := cfg.validate(path); err != nil {
return cfg, err
}

if cfg.Documents.MaxFileSize == 0 {
return cfg, fmt.Errorf("documents.max_file_size must be positive")
}

if cfg.Documents.CacheTTL != nil && cfg.Documents.CacheTTLDays != nil {
return cfg, fmt.Errorf(
"documents.cache_ttl and documents.cache_ttl_days cannot both be set -- " +
"remove cache_ttl_days (deprecated) and use cache_ttl instead",
)
}

if cfg.Documents.CacheTTLDays != nil {
cfg.Warnings = append(
cfg.Warnings,
"documents.cache_ttl_days is deprecated -- use documents.cache_ttl (e.g. \"30d\") instead",
)
if *cfg.Documents.CacheTTLDays < 0 {
return cfg, fmt.Errorf(
"documents.cache_ttl_days must be non-negative, got %d",
*cfg.Documents.CacheTTLDays,
)
}
}

if cfg.Documents.CacheTTL != nil && cfg.Documents.CacheTTL.Duration < 0 {
return cfg, fmt.Errorf(
"documents.cache_ttl must be non-negative, got %s",
cfg.Documents.CacheTTL.Duration,
)
}

if cfg.Extraction.MaxPages < 0 {
return cfg, fmt.Errorf(
"extraction.max_pages must be non-negative, got %d",
cfg.Extraction.MaxPages,
)
}

if t := cfg.Extraction.OCR.TSV.Threshold(); t < 0 || t > 100 {
return cfg, fmt.Errorf(
"extraction.ocr.tsv.confidence_threshold must be 0-100, got %d", t,
)
}

checkFilePermissions(&cfg, path)

return cfg, nil
}

// validateTimeout validates a pipeline timeout string.
func validateTimeout(timeout, prefix string) error {
if timeout == "" {
return nil
}
d, err := time.ParseDuration(timeout)
if err != nil {
return fmt.Errorf(
"%s.timeout: invalid duration %q -- use Go syntax like \"5m\" or \"10m\"",
prefix, timeout,
)
}
if d <= 0 {
return fmt.Errorf("%s.timeout must be positive, got %s", prefix, timeout)
}
return nil
}

// applyEnvOverrides walks the Config struct and applies environment variable
// overrides. Env var names are derived from the dotted TOML path via
// [EnvVarName]. The extra map supplies values migrated from deprecated env
Expand Down Expand Up @@ -794,14 +696,6 @@ func validProvider(name string) bool {
return false
}

var thinkingLevels = map[string]bool{
"none": true, "low": true, "medium": true, "high": true, "auto": true,
}

func validThinkingLevel(level string) bool {
return thinkingLevels[level]
}

// detectProvider infers the provider from the base URL and API key.
func detectProvider(baseURL, apiKey string) string {
if apiKey != "" {
Expand Down
50 changes: 4 additions & 46 deletions internal/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -311,53 +311,13 @@ func TestCacheTTLRejectsNegative(t *testing.T) {
assert.Contains(t, err.Error(), "must be non-negative")
}

// --- CacheTTLDays (deprecated) ---

func TestCacheTTLDaysStillWorks(t *testing.T) {
func TestCacheTTLDaysRemovedReturnsError(t *testing.T) {
path := writeConfig(t, "[documents]\ncache_ttl_days = 7\n")
cfg, err := LoadFromPath(path)
require.NoError(t, err)
assert.Equal(t, 7*24*time.Hour, cfg.Documents.CacheTTLDuration())
require.Len(t, cfg.Warnings, 1)
assert.Contains(t, cfg.Warnings[0], "documents.cache_ttl_days")
}

func TestCacheTTLDaysZeroDisables(t *testing.T) {
path := writeConfig(t, "[documents]\ncache_ttl_days = 0\n")
cfg, err := LoadFromPath(path)
require.NoError(t, err)
assert.Equal(t, time.Duration(0), cfg.Documents.CacheTTLDuration())
}

func TestCacheTTLDaysEnvOverride(t *testing.T) {
t.Setenv("MICASA_DOCUMENTS_CACHE_TTL_DAYS", "14")
cfg, err := LoadFromPath(noConfig(t))
require.NoError(t, err)
assert.Equal(t, 14*24*time.Hour, cfg.Documents.CacheTTLDuration())
require.Len(t, cfg.Warnings, 1)
assert.Contains(t, cfg.Warnings[0], "documents.cache_ttl_days")
}

func TestCacheTTLDaysRejectsNegative(t *testing.T) {
path := writeConfig(t, "[documents]\ncache_ttl_days = -1\n")
_, err := LoadFromPath(path)
require.Error(t, err)
assert.Contains(t, err.Error(), "must be non-negative")
}

func TestCacheTTLAndCacheTTLDaysBothSetFails(t *testing.T) {
path := writeConfig(t, "[documents]\ncache_ttl = \"30d\"\ncache_ttl_days = 30\n")
_, err := LoadFromPath(path)
require.Error(t, err)
assert.Contains(t, err.Error(), "cannot both be set")
}

func TestCacheTTLAndCacheTTLDaysEnvBothSetFails(t *testing.T) {
t.Setenv("MICASA_DOCUMENTS_CACHE_TTL", "30d")
t.Setenv("MICASA_DOCUMENTS_CACHE_TTL_DAYS", "30")
_, err := LoadFromPath(noConfig(t))
require.Error(t, err)
assert.Contains(t, err.Error(), "cannot both be set")
assert.Contains(t, err.Error(), "cache_ttl_days")
assert.Contains(t, err.Error(), "removed")
assert.Contains(t, err.Error(), "cache_ttl")
}

// --- API Keys ---
Expand Down Expand Up @@ -565,7 +525,6 @@ func TestInvalidEnvVarReturnsError(t *testing.T) {
{"MICASA_EXTRACTION_LLM_ENABLE", "maybe", "expected true or false"},
{"MICASA_DOCUMENTS_MAX_FILE_SIZE", "lots", "expected byte size"},
{"MICASA_DOCUMENTS_CACHE_TTL", "forever", "expected duration"},
{"MICASA_DOCUMENTS_CACHE_TTL_DAYS", "many", "expected integer"},
}
for _, tt := range tests {
t.Run(tt.envVar, func(t *testing.T) {
Expand Down Expand Up @@ -736,7 +695,6 @@ func TestEnvVars(t *testing.T) {

"MICASA_DOCUMENTS_MAX_FILE_SIZE": "documents.max_file_size",
"MICASA_DOCUMENTS_CACHE_TTL": "documents.cache_ttl",
"MICASA_DOCUMENTS_CACHE_TTL_DAYS": "documents.cache_ttl_days",
"MICASA_DOCUMENTS_FILE_PICKER_DIR": "documents.file_picker_dir",

"MICASA_LOCALE_CURRENCY": "locale.currency",
Expand Down
Loading
Loading