Skip to content

Commit ce67562

Browse files
author
Your Name
committed
fix: address review feedback on retry behavior and None guard
- Restore explicit Exception on max retries instead of silent warning - Move truncation logic before the retry loop so it only runs once on the initial incomplete response, not on every iteration - Add explicit None guard for physical_index before passing to convert_physical_index_to_int to prevent potential TypeError - Update test to expect Exception on max retries
1 parent 4464871 commit ce67562

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

pageindex/page_index.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def extract_toc_content(content, model=None):
186186
if if_complete == "yes" and finish_reason == "finished":
187187
break
188188
else:
189-
logging.warning('extract_toc_content: max retries reached, returning best effort result')
189+
raise Exception('Failed to complete table of contents extraction after maximum retries')
190190

191191
return response
192192

@@ -297,11 +297,12 @@ def toc_transformer(toc_content, model=None):
297297
]
298298
continue_prompt = "Please continue the table of contents JSON structure from where you left off. Directly output only the remaining part."
299299

300+
position = last_complete.rfind('}')
301+
if position != -1:
302+
last_complete = last_complete[:position+2]
303+
300304
max_attempts = 5
301305
for attempt in range(max_attempts):
302-
position = last_complete.rfind('}')
303-
if position != -1:
304-
last_complete = last_complete[:position+2]
305306

306307
new_complete, finish_reason = llm_completion(model=model, prompt=continue_prompt, chat_history=chat_history, return_finish_reason=True)
307308

@@ -316,7 +317,7 @@ def toc_transformer(toc_content, model=None):
316317
if if_complete == "yes" and finish_reason == "finished":
317318
break
318319
else:
319-
logging.warning('toc_transformer: max retries reached, returning best effort result')
320+
raise Exception('Failed to complete TOC transformation after maximum retries')
320321

321322
last_complete = extract_json(last_complete)
322323

@@ -741,7 +742,10 @@ async def single_toc_item_index_fixer(section_title, content, model=None):
741742
prompt = toc_extractor_prompt + '\nSection Title:\n' + str(section_title) + '\nDocument pages:\n' + content
742743
response = await llm_acompletion(model=model, prompt=prompt)
743744
json_content = extract_json(response)
744-
return convert_physical_index_to_int(json_content.get('physical_index'))
745+
physical_index = json_content.get('physical_index')
746+
if physical_index is None:
747+
return None
748+
return convert_physical_index_to_int(physical_index)
745749

746750

747751

tests/test_issue_163.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ def test_continues_on_incomplete(self, mock_llm, mock_check):
8181

8282
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")
8383
@patch("pageindex.page_index.llm_completion")
84-
def test_max_retries_returns_best_effort(self, mock_llm, mock_check):
84+
def test_max_retries_raises_exception(self, mock_llm, mock_check):
8585
mock_llm.return_value = ("chunk", "max_output_reached")
8686
mock_check.return_value = "no"
87-
result = extract_toc_content("raw content", model="test")
88-
assert "chunk" in result
87+
with pytest.raises(Exception, match="Failed to complete table of contents extraction"):
88+
extract_toc_content("raw content", model="test")
8989
assert mock_llm.call_count == 6
9090

9191
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")

0 commit comments

Comments
 (0)