d3LLM/eval_scripts/llada_humaneval.sh at main · hao-ai-lab/d3LLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# Qwen2.5-7B-Instruct, humaneval
export CUDA_VISIBLE_DEVICES="0,1,2,3"
export HF_ALLOW_CODE_EVAL=1
cd ~/Codes/d3LLM/utils/lm-evaluation-harness
PYTHONPATH=~/Codes/d3LLM/utils/lm-evaluation-harness:$PYTHONPATH \
accelerate launch -m lm_eval \
    --model hf \
    --model_args "pretrained=Qwen/Qwen2.5-7B-Instruct,temperature=0.0" \
    --tasks humaneval \
    --num_fewshot 0 \
    --batch_size 32 \
    --output_path evals_results/humaneval \
    --log_samples \
    --confirm_run_unsafe_code \
    --gen_kwargs do_sample=False,max_gen_toks=256
latest_jsonl=$(find evals_results/humaneval -name "samples_humaneval_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python ~/Codes/d3LLM/utils/utils_LLaDA/postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"


cd ~/Codes/d3LLM/utils/utils_LLaDA
export HF_ALLOW_CODE_EVAL=1
export HF_DATASETS_TRUST_REMOTE_CODE=true
task=humaneval
length=256
block_length=32
num_fewshot=0
steps=256

# 1. GSAI-ML/LLaDA-8B-Instruct, TPF=1.0
model_path='GSAI-ML/LLaDA-8B-Instruct'
output_dir='GSAI-ML'
METHOD_NAME_ENCODED=$(echo "${model_path}" | sed 's|/|__|g')
accelerate launch --main_process_port 29600 eval_llada.py --tasks ${task} --num_fewshot ${num_fewshot} \
--confirm_run_unsafe_code --model llada_dist \
--model_args model_path=${model_path},gen_length=${length},steps=${steps},block_length=${block_length},show_speed=True,task="${task}" \
--output_path evals_results/${output_dir}/${task}-ns${num_fewshot}-${length} --log_samples
latest_jsonl=$(find evals_results/${output_dir}/${task}-ns${num_fewshot}-${length}/${METHOD_NAME_ENCODED} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"

# 2. GSAI-ML/LLaDA-8B-Instruct, Fast-dLLM
rm -rf null/
model_path='GSAI-ML/LLaDA-8B-Instruct'
output_dir='GSAI-ML'
METHOD_NAME_ENCODED=$(echo "${model_path}" | sed 's|/|__|g')
accelerate launch --main_process_port 12335 eval_llada.py --tasks ${task} --num_fewshot ${num_fewshot} \
--confirm_run_unsafe_code --model llada_dist \
--model_args model_path=${model_path},steps=8,gen_length=${length},block_length=${block_length},remasking=low_confidence,threshold=0.9,save_dir=null,show_speed=True,task=${task},generation_method=Fast_dllm_v1,use_cache=True,dual_cache=True \
--output_path evals_results/${output_dir}/${task}-fast-dllm-ns${num_fewshot}-${length} --log_samples
latest_jsonl=$(find evals_results/${output_dir}/${task}-fast-dllm-ns${num_fewshot}-${length}/${METHOD_NAME_ENCODED} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"

# 3. Zigeng/dParallel-LLaDA-8B-instruct, TPF=1.0
cd ~/Codes/d3LLM/utils/utils_LLaDA
export HF_ALLOW_CODE_EVAL=1
export HF_DATASETS_TRUST_REMOTE_CODE=true
task=humaneval
length=256
block_length=32
num_fewshot=0
steps=256
model_path='Zigeng/dParallel-LLaDA-8B-instruct'
output_dir='Zigeng'
METHOD_NAME_ENCODED=$(echo "${model_path}" | sed 's|/|__|g')
accelerate launch --main_process_port 29600 eval_llada.py --tasks ${task} --num_fewshot ${num_fewshot} \
--confirm_run_unsafe_code --model llada_dist \
--model_args model_path=${model_path},gen_length=${length},steps=${steps},block_length=${block_length},show_speed=True,task="${task}" \
--output_path evals_results/${output_dir}/${task}-ns${num_fewshot}-${length} --log_samples
latest_jsonl=$(find evals_results/${output_dir}/${task}-ns${num_fewshot}-${length}/${METHOD_NAME_ENCODED} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"

# 4. Zigeng/dParallel-LLaDA-8B-instruct, entropy-threshold
cd ~/Codes/d3LLM/utils/utils_LLaDA
export HF_ALLOW_CODE_EVAL=1
export HF_DATASETS_TRUST_REMOTE_CODE=true
task=humaneval
length=256
block_length=32
num_fewshot=0
steps=256
model_path='Zigeng/dParallel-LLaDA-8B-instruct'
output_dir='Zigeng'
METHOD_NAME_ENCODED=$(echo "${model_path}" | sed 's|/|__|g')
accelerate launch --main_process_port 29601 eval_llada.py --tasks ${task} --num_fewshot ${num_fewshot} \
--confirm_run_unsafe_code --model llada_dist \
--model_args model_path=${model_path},gen_length=${length},steps=${steps},block_length=${block_length},show_speed=True,threshold=0.5,task="${task}" \
--output_path evals_results/${output_dir}/${task}-entropy-ns${num_fewshot}-${length} --log_samples
latest_jsonl=$(find evals_results/${output_dir}/${task}-entropy-ns${num_fewshot}-${length}/${METHOD_NAME_ENCODED} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"


# D2F TPF = 1.0
cd ~/Codes/d3LLM/utils/utils_LLaDA
export HF_ALLOW_CODE_EVAL=1
export HF_DATASETS_TRUST_REMOTE_CODE=true
task=humaneval
length=256
num_fewshot=0
output_dir='d2f'
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
    --main_process_port 29521 \
    --num_processes 4 \
    eval_d2f.py \
    --model dream_lora \
    --model_args pretrained=GSAI-ML/LLaDA-8B-Instruct,lora_path=SJTU-Deng-Lab/D2F_LLaDA_Instruct_8B_Lora,generation_method=generate,max_new_tokens=256,diffusion_steps=256,block_size=32,add_bos_token=true,temperature=0.0,remasking=low_confidence,dtype=bfloat16,save_dir=eval_tmp \
    --tasks ${task} \
    --num_fewshot ${num_fewshot} \
    --batch_size 1 \
    --output_path evals_results/${output_dir}/${task}-tpf1.0-ns${num_fewshot}-${length} \
    --log_samples \
    --confirm_run_unsafe_code
latest_jsonl=$(find evals_results/${output_dir}/${task}-tpf1.0-ns${num_fewshot}-${length} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"


# D2F
cd ~/Codes/d3LLM/utils/utils_LLaDA
export HF_ALLOW_CODE_EVAL=1
export HF_DATASETS_TRUST_REMOTE_CODE=true
task=humaneval
length=256
num_fewshot=0
output_dir='d2f'
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
    --main_process_port 29520 \
    --num_processes 4 \
    eval_d2f.py \
    --model dream_lora \
    --model_args pretrained=GSAI-ML/LLaDA-8B-Instruct,lora_path=SJTU-Deng-Lab/D2F_LLaDA_Instruct_8B_Lora,max_new_tokens=256,diffusion_steps=256,temperature=0,add_bos_token=true,escape_until=true,block_size=32,block_add_threshold=0.1,skip_threshold=0.9,decoded_token_threshold=0.95,dtype=bfloat16,sampling_strategy=default,save_dir=eval_tmp \
    --tasks ${task} \
    --num_fewshot ${num_fewshot} \
    --batch_size 1 \
    --output_path evals_results/${output_dir}/${task}-ns${num_fewshot}-${length} \
    --log_samples \
    --confirm_run_unsafe_code
latest_jsonl=$(find evals_results/${output_dir}/${task}-ns${num_fewshot}-${length} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"


# d3LLM-LLaDA, TPF=1.0
model_path='d3LLM/d3LLM_LLaDA'
output_dir='d3llm'
METHOD_NAME_ENCODED=$(echo "${model_path}" | sed 's|/|__|g')
accelerate launch --main_process_port 29600 eval_llada.py --tasks ${task} --num_fewshot ${num_fewshot} \
--confirm_run_unsafe_code --model llada_dist \
--model_args model_path=${model_path},gen_length=${length},steps=${steps},block_length=${block_length},show_speed=True,task="${task}" \
--output_path evals_results/${output_dir}/${task}-ns${num_fewshot}-${length} --log_samples
latest_jsonl=$(find evals_results/${output_dir}/${task}-ns${num_fewshot}-${length}/${METHOD_NAME_ENCODED} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"


# d3LLM-LLaDA, generate_multi_block (block_add_threshold=1.0, equal to entropy-threshold decoding)
cd ~/Codes/d3LLM/utils/utils_LLaDA
export HF_ALLOW_CODE_EVAL=1
export HF_DATASETS_TRUST_REMOTE_CODE=true
task=humaneval
length=256
block_length=32
num_fewshot=0
steps=256
model_path='d3LLM/d3LLM_LLaDA'
output_dir='d3llm'
METHOD_NAME_ENCODED=$(echo "${model_path}" | sed 's|/|__|g')
accelerate launch --main_process_port 29601 eval_llada.py --tasks ${task} --num_fewshot ${num_fewshot} \
--confirm_run_unsafe_code --model llada_dist \
--model_args model_path=${model_path},gen_length=${length},steps=${steps},block_length=${block_length},show_speed=True,threshold=0.5,task="${task}" \
--output_path evals_results/${output_dir}/${task}-entropy-ns${num_fewshot}-${length} --log_samples
latest_jsonl=$(find evals_results/${output_dir}/${task}-entropy-ns${num_fewshot}-${length}/${METHOD_NAME_ENCODED} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"


# d3LLM-LLaDA, generate_multi_block_kv_cache, delay=2:
cd ~/Codes/d3LLM/utils/utils_LLaDA
export HF_ALLOW_CODE_EVAL=1
export HF_DATASETS_TRUST_REMOTE_CODE=true
task=humaneval
length=256
block_length=32
num_fewshot=0
steps=256
model_path='d3LLM/d3LLM_LLaDA'
output_dir='d3llm'
METHOD_NAME_ENCODED=$(echo "${model_path}" | sed 's|/|__|g')
accelerate launch --main_process_port 29601 eval_llada.py --tasks ${task} --num_fewshot ${num_fewshot} \
--confirm_run_unsafe_code --model llada_dist \
--model_args model_path=${model_path},gen_length=${length},steps=${steps},block_length=${block_length},show_speed=True,threshold=0.5,task="${task}",generation_method="generate_multi_block_kv_cache",cache_delay_iter=2,refresh_interval=4,block_add_threshold=1.0,decoded_token_threshold=1.0,block_length=32,early_stop=True \
--output_path evals_results/${output_dir}/${task}-multi-block-ns${num_fewshot}-${length} --log_samples
latest_jsonl=$(find evals_results/${output_dir}/${task}-multi-block-ns${num_fewshot}-${length}/${METHOD_NAME_ENCODED} -name "samples_${task}_*.jsonl" -type f 2>/dev/null | head -n 1)
[ -n "$latest_jsonl" ] && python postprocess_code_humaneval.py "$latest_jsonl" || echo "No jsonl file found"