Skip to content

Commit a77ebda

Browse files
[formrecognizer] handle unsupervised pages better with service bug (#11017)
* handle unsupervised pages better * python 2 oops
1 parent 269715f commit a77ebda

2 files changed

Lines changed: 21 additions & 18 deletions

File tree

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,23 @@
1515
def get_elements(field, read_result):
1616
text_elements = []
1717

18-
for item in field.elements:
19-
nums = [int(s) for s in re.findall(r"\d+", item)]
20-
read = nums[0]
21-
line = nums[1]
22-
if len(nums) == 3:
23-
word = nums[2]
24-
ocr_word = read_result[read].lines[line].words[word]
25-
extracted_word = FormWord._from_generated(ocr_word, page=read + 1)
26-
text_elements.append(extracted_word)
27-
continue
28-
ocr_line = read_result[read].lines[line]
29-
extracted_line = FormLine._from_generated(ocr_line, page=read + 1)
30-
text_elements.append(extracted_line)
31-
return text_elements
18+
try:
19+
for item in field.elements:
20+
nums = [int(s) for s in re.findall(r"\d+", item)]
21+
read = nums[0]
22+
line = nums[1]
23+
if len(nums) == 3:
24+
word = nums[2]
25+
ocr_word = read_result[read].lines[line].words[word]
26+
extracted_word = FormWord._from_generated(ocr_word, page=read + 1)
27+
text_elements.append(extracted_word)
28+
continue
29+
ocr_line = read_result[read].lines[line]
30+
extracted_line = FormLine._from_generated(ocr_line, page=read + 1)
31+
text_elements.append(extracted_line)
32+
return text_elements
33+
except IndexError:
34+
return None # https://github.com/Azure/azure-sdk-for-python/issues/11014
3235

3336

3437
def get_field_value(field, value, read_result): # pylint: disable=too-many-return-statements

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_response_handlers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,15 @@ def prepare_content_result(response):
9999
read_result = response.analyze_result.read_results
100100
page_result = response.analyze_result.page_results
101101

102-
for page in read_result:
102+
for idx, page in enumerate(read_result):
103103
form_page = FormPage(
104104
page_number=page.page,
105105
text_angle=page.angle,
106106
width=page.width,
107107
height=page.height,
108108
unit=page.unit,
109109
lines=[FormLine._from_generated(line, page=page.page) for line in page.lines] if page.lines else None,
110-
tables=prepare_tables(page_result[page.page-1], read_result),
110+
tables=prepare_tables(page_result[idx], read_result),
111111
)
112112
pages.append(form_page)
113113
return pages
@@ -126,7 +126,7 @@ def prepare_unlabeled_result(response):
126126
read_result = response.analyze_result.read_results
127127
page_result = response.analyze_result.page_results
128128

129-
for page in page_result:
129+
for index, page in enumerate(page_result):
130130
unlabeled_fields = [FormField._from_generated_unlabeled(field, idx, page.page, read_result)
131131
for idx, field in enumerate(page.key_value_pairs)] if page.key_value_pairs else None
132132
if unlabeled_fields:
@@ -138,7 +138,7 @@ def prepare_unlabeled_result(response):
138138
),
139139
fields=unlabeled_fields,
140140
form_type="form-" + str(page.cluster_id) if page.cluster_id is not None else None,
141-
pages=[form_pages[page.page-1]]
141+
pages=[form_pages[index]]
142142
)
143143
result.append(form)
144144

0 commit comments

Comments
 (0)