Skip to content

Commit 5c93954

Browse files
committed
Add TREC.sh script for data processing and model training
1 parent 5be9c47 commit 5c93954

File tree

1 file changed

+146
-0
lines changed

1 file changed

+146
-0
lines changed

run/TREC.sh

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# overrides
2+
data=TREC
3+
model=RetrieverBERT
4+
5+
text_max_length=256
6+
label_max_length=256
7+
label_enhancement=LLM
8+
text_features_source=TXT
9+
10+
## sparse_retrieve
11+
for fold_idx in $(seq $1 $2);
12+
do
13+
time_start=$(date '+%Y-%m-%d %H:%M:%S')
14+
python main.py \
15+
tasks=[sparse_retrieve] \
16+
model=BM25 \
17+
data=$data \
18+
data.text_features_source=$text_features_source \
19+
data.folds=[$fold_idx]
20+
time_end=$(date '+%Y-%m-%d %H:%M:%S')
21+
echo "$time_start,$time_end" > resource/time/sparse_retrieve_${data}_${fold_idx}.tmr
22+
done
23+
24+
# prompt_opt (uncomment to run — requires vLLM server on localhost:8001)
25+
#time_start=$(date '+%Y-%m-%d %H:%M:%S')
26+
#python main.py \
27+
# tasks=[prompt_opt] \
28+
# data=$data \
29+
# data.text_features_source=$text_features_source
30+
#time_end=$(date '+%Y-%m-%d %H:%M:%S')
31+
#echo "$time_start,$time_end" > resource/time/prompt_opt_${data}.tmr
32+
33+
# label_desc (uncomment to run — requires vLLM server + optimized_prompt.txt)
34+
#for fold_idx in $(seq $1 $2);
35+
#do
36+
# time_start=$(date '+%Y-%m-%d %H:%M:%S')
37+
# python main.py \
38+
# tasks=[label_desc] \
39+
# data=$data \
40+
# data.text_features_source=$text_features_source \
41+
# data.folds=[$fold_idx]
42+
# time_end=$(date '+%Y-%m-%d %H:%M:%S')
43+
# echo "$time_start,$time_end" > resource/time/label_desc_${data}_${fold_idx}.tmr
44+
#done
45+
46+
# dense_retrieve fit
47+
for fold_idx in $(seq $1 $2);
48+
do
49+
time_start=$(date '+%Y-%m-%d %H:%M:%S')
50+
python main.py \
51+
tasks=[fit] \
52+
trainer.max_epochs=5 \
53+
trainer.patience=3 \
54+
model=$model \
55+
model.name=LLM_${model} \
56+
data=$data \
57+
data.text_max_length=$text_max_length \
58+
data.label_max_length=$label_max_length \
59+
data.label_enhancement=$label_enhancement \
60+
data.text_features_source=$text_features_source \
61+
data.batch_size=128 \
62+
data.num_workers=12 \
63+
data.folds=[$fold_idx]
64+
time_end=$(date '+%Y-%m-%d %H:%M:%S')
65+
echo "$time_start,$time_end" > resource/time/fit_LLM_${model}_${data}_${fold_idx}.tmr
66+
done
67+
68+
# dense_retrieve predict
69+
for fold_idx in $(seq $1 $2);
70+
do
71+
time_start=$(date '+%Y-%m-%d %H:%M:%S')
72+
python main.py \
73+
tasks=[predict] \
74+
trainer.max_epochs=5 \
75+
trainer.patience=3 \
76+
model=$model \
77+
model.name=LLM_${model} \
78+
data=$data \
79+
data.text_max_length=$text_max_length \
80+
data.label_max_length=$label_max_length \
81+
data.label_enhancement=$label_enhancement \
82+
data.text_features_source=$text_features_source \
83+
data.batch_size=128 \
84+
data.num_workers=12 \
85+
data.folds=[$fold_idx]
86+
time_end=$(date '+%Y-%m-%d %H:%M:%S')
87+
echo "$time_start,$time_end" > resource/time/predict_LLM_${model}_${data}_${fold_idx}.tmr
88+
done
89+
90+
# dense_retrieve eval
91+
for fold_idx in $(seq $1 $2);
92+
do
93+
time_start=$(date '+%Y-%m-%d %H:%M:%S')
94+
python main.py \
95+
tasks=[eval] \
96+
trainer.max_epochs=5 \
97+
trainer.patience=3 \
98+
model=$model \
99+
model.name=LLM_${model} \
100+
data=$data \
101+
data.text_max_length=$text_max_length \
102+
data.label_max_length=$label_max_length \
103+
data.label_enhancement=$label_enhancement \
104+
data.text_features_source=$text_features_source \
105+
data.batch_size=128 \
106+
data.num_workers=12 \
107+
data.folds=[$fold_idx]
108+
time_end=$(date '+%Y-%m-%d %H:%M:%S')
109+
echo "$time_start,$time_end" > resource/time/eval_LLM_${model}_${data}_${fold_idx}.tmr
110+
done
111+
112+
# fuse
113+
for fold_idx in $(seq $1 $2);
114+
do
115+
time_start=$(date '+%Y-%m-%d %H:%M:%S')
116+
python main.py \
117+
tasks=[fuse] \
118+
model=$model \
119+
model.name=LLM_${model} \
120+
data=$data \
121+
data.text_features_source=$text_features_source \
122+
data.folds=[$fold_idx]
123+
time_end=$(date '+%Y-%m-%d %H:%M:%S')
124+
echo "$time_start,$time_end" > resource/time/fuse_LLM_${model}_${data}_${fold_idx}.tmr
125+
done
126+
127+
128+
# aggregate
129+
for fold_idx in $(seq $1 $2);
130+
do
131+
time_start=$(date '+%Y-%m-%d %H:%M:%S')
132+
python main.py \
133+
tasks=[aggregate] \
134+
model=$model \
135+
model.name=LLM_${model} \
136+
data=$data \
137+
data.text_max_length=$text_max_length \
138+
data.label_max_length=$label_max_length \
139+
data.label_enhancement=$label_enhancement \
140+
data.text_features_source=$text_features_source \
141+
data.batch_size=128 \
142+
data.num_workers=12 \
143+
data.folds=[$fold_idx]
144+
time_end=$(date '+%Y-%m-%d %H:%M:%S')
145+
echo "$time_start,$time_end" > resource/time/aggregate_LLM_${model}_${data}_${fold_idx}.tmr
146+
done

0 commit comments

Comments
 (0)