Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 116 additions & 91 deletions openseek/competition/LongContext-ICL-Annotation/src/main.py
Original file line number Diff line number Diff line change
@@ -1,92 +1,117 @@
import json, os, argparse
from tqdm import tqdm, trange
from transformers import AutoTokenizer

# from method import build_prompt, select_examples, annotate

from method import build_prompt, select_examples

from method import annotate_nvidia as annotate # For Nvidia GPU
# from method import annotate_ascend as annotate # For Huawei Ascend

TASK_FILES = {
1: './data/openseek-1_closest_integers.json',
2: './data/openseek-2_count_nouns_verbs.json',
3: './data/openseek-3_collatz_conjecture.json',
4: './data/openseek-4_conala_concat_strings.json',
5: './data/openseek-5_semeval_2018_task1_tweet_sadness_detection.json',
6: './data/openseek-6_mnli_same_genre_classification.json',
7: './data/openseek-7_jeopardy_answer_generation_all.json',
8: '../data/openseek-8_kernel_generation.json',
}

def parser_args():
parser = argparse.ArgumentParser()
parser.add_argument('--task_id', type=int, required=True,
help='Task ID to evaluate, should be in [1, 7].')
parser.add_argument('--max_input_length', type=int, default=10_000,
help='Maximum input length for the model.')
parser.add_argument('--log_path_prefix', type=str,
default='../outputs/',
help='Prefix path to save the evaluation logs.')
parser.add_argument('--tokenizer_path', type=str,
default='/share/project/wuhaiming/spaces/data_agent/OpenSeek-main/openseek/competition/LongContext-ICL-Annotation/src/Qwen3-4B')
args = parser.parse_args()
return args

def evaluate(task_id:int,
qwen_tokenizer:AutoTokenizer,
max_input_length:int=128_000,
log_path_prefix:str='./outputs/'
)->float:
assert task_id in [i for i in range(1, 9)],\
f"task_id should be in [1, 8], but got {task_id}."

task_file = TASK_FILES[task_id]
with open(task_file, 'r') as f:
task_dict = json.load(f)

task_name = task_dict['task_name']
task_description = task_dict['Definition'][0]
icl_examples = task_dict['examples'][:100]
test_samples = task_dict['test_samples']

version = 1
output_file = f'{log_path_prefix}openseek-{task_id}-v{version}.jsonl'
output_path = os.path.dirname(output_file)
os.makedirs(output_path, exist_ok=True)
while os.path.exists(output_file):
version += 1
output_file = f'{log_path_prefix}openseek-{task_id}-v{version}.jsonl'
with open(output_file, 'w') as f:
pass

examples_str = None
for test_sample in tqdm(test_samples, desc=f'Evaluation on Task {task_id}: {task_name}'):
test_record = dict()

test_sample_id = test_sample['id']
test_record['test_sample_id'] = test_sample_id


text2annotate = test_sample['input']
prompt = build_prompt(task_description, text2annotate)
if examples_str is None:
examples_str = select_examples(icl_examples, task_description, text2annotate)
input_prompt = prompt.replace("[[EXAMPLES]]\n\n", examples_str+'\n\n')

# tokenized_input = qwen_tokenizer(input_prompt, return_tensors="pt")
# if tokenized_input['input_ids'].shape[1] > max_input_length:
# test_record['prediction'] = None
# else:
# prediction = annotate(input_prompt)
# test_record['prediction'] = prediction
prediction = annotate(input_prompt)
test_record['prediction'] = prediction
with open(output_file, 'a') as f:
f.write(json.dumps(test_record)+'\n')

if __name__ == '__main__':
args = parser_args()
qwen_tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
import json, os, argparse
from tqdm import tqdm, trange
from transformers import AutoTokenizer

# from method import build_prompt, select_examples, annotate

from method import build_prompt, select_examples

# from method import annotate_nvidia as annotate # For Nvidia GPU
from method import annotate_ascend as annotate # For Huawei Ascend
from method import annotate_batch

DATA_DIR = '/root/flagos/OpenSeek/openseek/competition/LongContext-ICL-Annotation/data'
OUTPUT_DIR = '/root/flagos/OpenSeek/openseek/competition/LongContext-ICL-Annotation/outputs'
Comment on lines +13 to +14
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Hardcoded absolute paths (/root/flagos/...) make the script non-portable and environment-dependent. Consider using relative paths or environment variables to define DATA_DIR and OUTPUT_DIR.

Suggested change
DATA_DIR = '/root/flagos/OpenSeek/openseek/competition/LongContext-ICL-Annotation/data'
OUTPUT_DIR = '/root/flagos/OpenSeek/openseek/competition/LongContext-ICL-Annotation/outputs'
DATA_DIR = './data'
OUTPUT_DIR = './outputs'


TASK_FILES = {
1: f'{DATA_DIR}/openseek-1_closest_integers.json',
2: f'{DATA_DIR}/openseek-2_count_nouns_verbs.json',
3: f'{DATA_DIR}/openseek-3_collatz_conjecture.json',
4: f'{DATA_DIR}/openseek-4_conala_concat_strings.json',
5: f'{DATA_DIR}/openseek-5_semeval_2018_task1_tweet_sadness_detection.json',
6: f'{DATA_DIR}/openseek-6_mnli_same_genre_classification.json',
7: f'{DATA_DIR}/openseek-7_jeopardy_answer_generation_all.json',
8: f'{DATA_DIR}/openseek-8_kernel_generation.json',
}

def parser_args():
parser = argparse.ArgumentParser()
parser.add_argument('--task_id', type=int, required=True,
help='Task ID to evaluate, should be in [1, 7].')
parser.add_argument('--max_input_length', type=int, default=10_000,
help='Maximum input length for the model.')
parser.add_argument('--log_path_prefix', type=str,
default='/root/flagos/OpenSeek/openseek/competition/LongContext-ICL-Annotation/outputs/',
help='Prefix path to save the evaluation logs.')
parser.add_argument('--tokenizer_path', type=str,
default='/root/flagos/Qwen3-4B')
Comment on lines +34 to +37
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The default values for --log_path_prefix and --tokenizer_path contain absolute paths specific to the current environment. These should be changed to relative paths or generic identifiers to allow the script to run on other systems without manual modification.

    parser.add_argument('--log_path_prefix', type=str, 
                        default='./outputs/',
                        help='Prefix path to save the evaluation logs.')
    parser.add_argument('--tokenizer_path', type=str,
                        default='Qwen/Qwen2-7B-Instruct')

args = parser.parse_args()
return args

def evaluate(task_id:int,
qwen_tokenizer:AutoTokenizer,
max_input_length:int=128_000,
log_path_prefix:str='./outputs/'
)->float:
assert task_id in [i for i in range(1, 9)],\
f"task_id should be in [1, 8], but got {task_id}."

task_file = TASK_FILES[task_id]
with open(task_file, 'r') as f:
task_dict = json.load(f)

task_name = task_dict['task_name']
task_description = task_dict['Definition'][0]
icl_examples = task_dict['examples'][:50]
test_samples = task_dict['test_samples']

version = 1
output_file = f'{log_path_prefix}openseek-{task_id}-v{version}.jsonl'
output_path = os.path.dirname(output_file)
os.makedirs(output_path, exist_ok=True)
while os.path.exists(output_file):
version += 1
output_file = f'{log_path_prefix}openseek-{task_id}-v{version}.jsonl'
with open(output_file, 'w') as f:
pass

examples_str = None
batch_size = 8
prompts_batch = []
sample_ids_batch = []

# Task 8 is code generation, needs more tokens and different post-processing
max_tokens = 1024 if task_id == 8 else 256
use_count_answer = False if task_id == 8 else True

for test_sample in tqdm(test_samples, desc=f'Evaluation on Task {task_id}: {task_name}'):
test_sample_id = test_sample['id']
text2annotate = test_sample['input']

prompt = build_prompt(task_description, text2annotate, task_id=task_id)
if examples_str is None:
# Task 8 is code generation task
is_code_generation = (task_id == 8)

examples_str = select_examples(icl_examples, task_description, text2annotate,
is_code_generation=is_code_generation,
use_task_aware=True, task_id=task_id,
use_quality_filter=True, quality_threshold=0.5,
use_diversity=False, use_similarity=False)
input_prompt = prompt.replace("[[EXAMPLES]]\n\n", examples_str+'\n\n')

prompts_batch.append(input_prompt)
sample_ids_batch.append(test_sample_id)

# Process batch when full
if len(prompts_batch) >= batch_size:
results = annotate_batch(prompts_batch, num_workers=4, max_tokens=max_tokens, use_count_answer=use_count_answer, task_id=task_id)
for sid, (pred, _) in zip(sample_ids_batch, results):
test_record = {'test_sample_id': sid, 'prediction': pred}
with open(output_file, 'a') as f:
f.write(json.dumps(test_record)+'\n')
Comment on lines +99 to +102
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Opening the output file in append mode inside a loop for every sample in a batch is inefficient due to repeated I/O overhead. It is better to open the file once and write all results from the batch at once.

Suggested change
for sid, (pred, _) in zip(sample_ids_batch, results):
test_record = {'test_sample_id': sid, 'prediction': pred}
with open(output_file, 'a') as f:
f.write(json.dumps(test_record)+'\n')
with open(output_file, 'a') as f:
for sid, (pred, _) in zip(sample_ids_batch, results):
test_record = {'test_sample_id': sid, 'prediction': pred}
f.write(json.dumps(test_record) + '\n')

prompts_batch = []
sample_ids_batch = []

# Process remaining samples
if prompts_batch:
results = annotate_batch(prompts_batch, num_workers=4, max_tokens=max_tokens, use_count_answer=use_count_answer, task_id=task_id)
for sid, (pred, _) in zip(sample_ids_batch, results):
test_record = {'test_sample_id': sid, 'prediction': pred}
with open(output_file, 'a') as f:
f.write(json.dumps(test_record)+'\n')
Comment on lines +109 to +112
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to the batch processing loop, opening the file for each remaining sample is inefficient. Consider opening the file once to write the remaining results.

Suggested change
for sid, (pred, _) in zip(sample_ids_batch, results):
test_record = {'test_sample_id': sid, 'prediction': pred}
with open(output_file, 'a') as f:
f.write(json.dumps(test_record)+'\n')
with open(output_file, 'a') as f:
for sid, (pred, _) in zip(sample_ids_batch, results):
test_record = {'test_sample_id': sid, 'prediction': pred}
f.write(json.dumps(test_record) + '\n')


if __name__ == '__main__':
args = parser_args()
qwen_tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
evaluate(args.task_id, qwen_tokenizer, args.max_input_length, args.log_path_prefix)
Loading