-
Notifications
You must be signed in to change notification settings - Fork 126
Expand file tree
/
Copy pathrun_hy_ocr.py
More file actions
119 lines (99 loc) · 3.21 KB
/
run_hy_ocr.py
File metadata and controls
119 lines (99 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import json
import base64
from openai import OpenAI
from tqdm import tqdm
from typing import Dict, List
def encode_image(image_path: str) -> str:
"""
Encode image file to base64 string.
Args:
image_path: Path to the image file
Returns:
Base64 encoded string of the image
"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def create_chat_messages(image_path: str, prompt: str) -> List[Dict]:
"""
Create chat messages with image and prompt.
Args:
image_path: Path to the image file
prompt: Text prompt for the model
Returns:
List of message dictionaries
"""
return [
{"role": "system", "content": ""},
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{encode_image(image_path)}"
}
},
{"type": "text", "text": prompt}
]
}
]
def process_single_item(client: OpenAI, data: Dict) -> Dict:
"""
Process a single data item through the VLLM API.
Args:
client: OpenAI client instance
data: Input data dictionary
Returns:
Updated data dictionary with model response
"""
# Extract image path and prompt
img_path = data['image_path']
prompt = data['question']
# Create chat messages
messages = create_chat_messages(img_path, prompt)
# Get model response
response = client.chat.completions.create(
model="tencent/HunyuanOCR",
messages=messages,
temperature=0.0,
top_p=0.95,
seed=1234,
stream=False,
extra_body={
"top_k": 1,
"repetition_penalty": 1.0
}
)
# Update data with model response
data["vllm_answer"] = response.choices[0].message.content
return data
def main():
"""Main function to process the JSONL file through VLLM API"""
# Initialize OpenAI client
client = OpenAI(
api_key="EMPTY",
base_url="http://localhost:8000/v1",
timeout=3600
)
# Define input/output paths
input_path = 'ominidoc_bench.jsonl'
output_path = "infer_result_ominidoc_bench.jsonl"
# Process data
with open(input_path, "r", encoding="utf-8") as fin, \
open(output_path, "w", encoding="utf-8") as fout:
# Iterate through input file
for line in tqdm(fin, desc="Processing documents"):
if not line.strip():
continue
try:
# Load and process data
data = json.loads(line)
processed_data = process_single_item(client, data)
# Write results
fout.write(json.dumps(processed_data, ensure_ascii=False) + "\n")
except Exception as e:
print(f"Error processing line: {str(e)}")
continue
print(f"Processing completed. Results saved to: {output_path}")
if __name__ == "__main__":
main()