Skip to content

Commit 8f7cb55

Browse files
cpcloudclaude
andauthored
feat(extract): allow accepting extraction results without LLM (#721)
## Summary - Remove the `HasError` gate on the extraction overlay's accept action so `a` works regardless of whether the LLM step ran, failed, or was skipped - Extracted text (pdftotext/OCR) can now be persisted without requiring LLM analysis to succeed - Fix bug where `acceptDeferredExtraction` did not call `reloadAfterMutation` when there were no non-document operations, causing accepted deferred documents to not appear in the table closes #710 Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent bb8a9af commit 8f7cb55

2 files changed

Lines changed: 118 additions & 12 deletions

File tree

internal/app/extraction.go

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -813,20 +813,19 @@ func applyStringField(data map[string]any, key string, dst *string) {
813813
}
814814

815815
// acceptExtraction persists all pending results and closes the overlay.
816+
// Works regardless of whether LLM ran, failed, or was skipped.
816817
func (m *Model) acceptExtraction() {
817818
ex := m.ex.extraction
818819
if ex == nil || !ex.Done || ex.accepted {
819820
return
820821
}
821822

822823
if ex.pendingDoc != nil {
823-
// Deferred creation (magic-add): create document now.
824824
if err := m.acceptDeferredExtraction(); err != nil {
825825
m.setStatusError(err.Error())
826826
return
827827
}
828828
} else {
829-
// Existing document: persist extraction results and dispatch ops.
830829
if err := m.acceptExistingExtraction(); err != nil {
831830
m.setStatusError(err.Error())
832831
return
@@ -879,6 +878,7 @@ func (m *Model) acceptDeferredExtraction() error {
879878
if err := m.commitShadowOperations(ex, nonDocOps); err != nil {
880879
return fmt.Errorf("dispatch operations: %w", err)
881880
}
881+
m.reloadAfterMutation()
882882
return nil
883883
}
884884

@@ -1075,7 +1075,7 @@ func (m *Model) handleExtractionPipelineKey(msg tea.KeyMsg) tea.Cmd {
10751075
return m.activateExtractionModelPicker()
10761076
}
10771077
case keyA:
1078-
if ex.Done && !ex.HasError {
1078+
if ex.Done {
10791079
m.acceptExtraction()
10801080
}
10811081
case keyX:
@@ -1264,7 +1264,7 @@ func (m *Model) handleExtractionExploreKey(msg tea.KeyMsg) tea.Cmd {
12641264
ex.previewCol = len(g.specs) - 1
12651265
}
12661266
case keyA:
1267-
if ex.Done && !ex.HasError {
1267+
if ex.Done {
12681268
m.acceptExtraction()
12691269
}
12701270
case keyX:
@@ -1495,10 +1495,7 @@ func (m *Model) buildExtractionPipelineOverlay(
14951495
if len(ex.previewGroups) > 1 {
14961496
hints = append(hints, m.helpItem(keyB+"/"+keyF, "tabs"))
14971497
}
1498-
if !ex.HasError {
1499-
hints = append(hints, m.helpItem(keyA, "accept"))
1500-
}
1501-
hints = append(hints, m.helpItem(keyX, "back"), m.helpItem(keyEsc, "discard"))
1498+
hints = append(hints, m.helpItem(keyA, "accept"), m.helpItem(keyX, "back"), m.helpItem(keyEsc, "discard"))
15021499
} else {
15031500
hints = append(hints, m.helpItem(keyJ+"/"+keyK, "navigate"))
15041501
cursorStatus := ex.Steps[ex.cursorStep()].Status
@@ -1509,10 +1506,7 @@ func (m *Model) buildExtractionPipelineOverlay(
15091506
hints = append(hints, m.helpItem(keyX, "explore"))
15101507
}
15111508
if ex.Done {
1512-
if !ex.HasError {
1513-
hints = append(hints, m.helpItem(keyA, "accept"))
1514-
}
1515-
hints = append(hints, m.helpItem(keyEsc, "discard"))
1509+
hints = append(hints, m.helpItem(keyA, "accept"), m.helpItem(keyEsc, "discard"))
15161510
} else {
15171511
hints = append(hints,
15181512
m.helpItem(symCtrlC, "int"),

internal/app/extraction_test.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2719,3 +2719,115 @@ func TestExtractionExtractFails_PingSkipPreventsLLM(t *testing.T) {
27192719
assert.Equal(t, stepSkipped, ex.Steps[stepLLM].Status)
27202720
assert.True(t, ex.Done)
27212721
}
2722+
2723+
// --- Accept works with or without LLM ---
2724+
2725+
func TestAccept_WorksWhenLLMFailed(t *testing.T) {
2726+
t.Parallel()
2727+
m := newExtractionModel(t, map[extractionStep]stepStatus{
2728+
stepText: stepDone,
2729+
stepLLM: stepFailed,
2730+
})
2731+
ex := m.ex.extraction
2732+
ex.Done = true
2733+
ex.HasError = true
2734+
ex.pendingText = "ocr text"
2735+
2736+
doc := &data.Document{
2737+
FileName: "invoice.pdf",
2738+
MIMEType: "application/pdf",
2739+
Data: []byte("pdf-bytes"),
2740+
}
2741+
require.NoError(t, m.store.CreateDocument(doc))
2742+
ex.DocID = doc.ID
2743+
2744+
sendExtractionKey(m, "a")
2745+
2746+
assert.Nil(t, m.ex.extraction, "accept should work even when LLM failed")
2747+
2748+
saved, err := m.store.GetDocument(doc.ID)
2749+
require.NoError(t, err)
2750+
assert.Equal(t, "ocr text", saved.ExtractedText)
2751+
}
2752+
2753+
func TestAccept_WorksWithoutLLMStep(t *testing.T) {
2754+
t.Parallel()
2755+
m := newExtractionModel(t, map[extractionStep]stepStatus{
2756+
stepText: stepDone,
2757+
stepExtract: stepDone,
2758+
})
2759+
ex := m.ex.extraction
2760+
ex.Done = true
2761+
ex.pendingText = "ocr text"
2762+
2763+
doc := &data.Document{
2764+
FileName: "scan.pdf",
2765+
MIMEType: "application/pdf",
2766+
Data: []byte("pdf-bytes"),
2767+
}
2768+
require.NoError(t, m.store.CreateDocument(doc))
2769+
ex.DocID = doc.ID
2770+
2771+
sendExtractionKey(m, "a")
2772+
2773+
assert.Nil(t, m.ex.extraction, "accept should work without LLM step")
2774+
2775+
saved, err := m.store.GetDocument(doc.ID)
2776+
require.NoError(t, err)
2777+
assert.Equal(t, "ocr text", saved.ExtractedText)
2778+
}
2779+
2780+
func TestAccept_DeferredDoc_WorksWhenLLMFailed(t *testing.T) {
2781+
t.Parallel()
2782+
m := newExtractionModel(t, map[extractionStep]stepStatus{
2783+
stepText: stepDone,
2784+
stepLLM: stepFailed,
2785+
})
2786+
ex := m.ex.extraction
2787+
ex.Done = true
2788+
ex.HasError = true
2789+
ex.extractedText = "invoice text"
2790+
ex.pendingDoc = &data.Document{
2791+
FileName: "invoice.pdf",
2792+
MIMEType: "application/pdf",
2793+
Data: []byte("pdf-bytes"),
2794+
ExtractedText: "invoice text",
2795+
}
2796+
2797+
sendExtractionKey(m, "a")
2798+
2799+
assert.Nil(t, m.ex.extraction, "accept should work for deferred doc when LLM failed")
2800+
2801+
docs, err := m.store.ListDocuments(false)
2802+
require.NoError(t, err)
2803+
require.Len(t, docs, 1)
2804+
assert.Equal(t, "invoice.pdf", docs[0].FileName)
2805+
}
2806+
2807+
func TestAccept_DeferredDoc_WorksWithoutLLMStep(t *testing.T) {
2808+
t.Parallel()
2809+
m := newExtractionModel(t, map[extractionStep]stepStatus{
2810+
stepText: stepDone,
2811+
stepExtract: stepDone,
2812+
})
2813+
ex := m.ex.extraction
2814+
ex.Done = true
2815+
ex.pendingText = "better ocr text"
2816+
ex.pendingDoc = &data.Document{
2817+
FileName: "scan.pdf",
2818+
MIMEType: "application/pdf",
2819+
Data: []byte("pdf-bytes"),
2820+
}
2821+
2822+
sendExtractionKey(m, "a")
2823+
2824+
assert.Nil(t, m.ex.extraction, "accept should work for deferred doc without LLM")
2825+
2826+
docs, err := m.store.ListDocuments(false)
2827+
require.NoError(t, err)
2828+
require.Len(t, docs, 1)
2829+
2830+
full, err := m.store.GetDocument(docs[0].ID)
2831+
require.NoError(t, err)
2832+
assert.Equal(t, "better ocr text", full.ExtractedText)
2833+
}

0 commit comments

Comments
 (0)