d3LLM/eval_scripts/dream_math.sh at main · hao-ai-lab/d3LLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Qwen2.5-7B-Instruct, minerva_math
export CUDA_VISIBLE_DEVICES="0,1,2,3"
export HF_ALLOW_CODE_EVAL=1
cd ~/Codes/d3LLM/utils/lm-evaluation-harness
PYTHONPATH=~/Codes/d3LLM/utils/lm-evaluation-harness:$PYTHONPATH \
accelerate launch -m lm_eval \
    --model hf \
    --model_args "pretrained=Qwen/Qwen2.5-7B-Instruct,temperature=0.0" \
    --tasks minerva_math \
    --num_fewshot 4 \
    --batch_size 32 \
    --output_path evals_results/minerva_math   \
    --log_samples \
    --confirm_run_unsafe_code \
    --gen_kwargs do_sample=False,max_gen_toks=256

## Vanilla Dream, TPF=1.0:
cd ~/Codes/d3LLM/utils/utils_Dream/eval_instruct
accelerate launch --main_process_port 12334 -m lm_eval --model diffllm --model_args torch_compile=False,pretrained=Dream-org/Dream-v0-Instruct-7B,trust_remote_code=True,max_new_tokens=256,diffusion_steps=256,dtype=bfloat16,temperature=0.1,top_p=0.9,alg=entropy,dParallel=False --tasks minerva_math --device cuda --batch_size 1 --num_fewshot 0 --output_path ./eval_tmp/minerva_4 --log_samples --confirm_run_unsafe_code --apply_chat_template


# Fast-dLLM Dream (dual cache):
cd ~/Codes/d3LLM/utils/utils_Dream/eval_instruct
accelerate launch --main_process_port 12334 -m lm_eval --model diffllm --model_args torch_compile=False,pretrained=Dream-org/Dream-v0-Instruct-7B,trust_remote_code=True,max_new_tokens=256,diffusion_steps=8,dtype=bfloat16,temperature=0.,alg=confidence_threshold,threshold=0.9,generation_method=Fast_dllm_v1,use_cache=True,dual_cache=True,block_length=32 --tasks minerva_math --device cuda --batch_size 1 --num_fewshot 0 --output_path ./eval_tmp/fast_dllm_dual_cache --log_samples --confirm_run_unsafe_code --apply_chat_template


# dParallel-Dream, TPF=1.0:
cd ~/Codes/d3LLM/utils/utils_Dream/eval_instruct
accelerate launch --main_process_port 12334 -m lm_eval --model diffllm --model_args torch_compile=False,pretrained=Zigeng/dParallel_Dream_7B_Instruct,trust_remote_code=True,max_new_tokens=256,diffusion_steps=256,dtype=bfloat16,temperature=0.1,top_p=0.9,alg=entropy,dParallel=False --tasks minerva_math --device cuda --batch_size 1 --num_fewshot 0 --output_path ./eval_tmp/minerva_4 --log_samples --confirm_run_unsafe_code --apply_chat_template


# dParallel-Dream, entropy-threshold=0.45:
cd ~/Codes/d3LLM/utils/utils_Dream/eval_instruct
accelerate launch --main_process_port 12334 -m lm_eval --model diffllm --model_args torch_compile=False,pretrained=Zigeng/dParallel_Dream_7B_Instruct,trust_remote_code=True,max_new_tokens=256,diffusion_steps=256,dtype="bfloat16",temperature=0.,alg="entropy_threshold",dParallel=True,threshold=0.45 --tasks minerva_math --device cuda --batch_size 1 --num_fewshot 0 --output_path ./eval_tmp/minerva_4 --log_samples --confirm_run_unsafe_code --apply_chat_template


# d3LLM-Dream, TPF=1.0:
cd ~/Codes/d3LLM/utils/utils_Dream/eval_instruct
accelerate launch --main_process_port 12334 -m lm_eval --model diffllm --model_args pretrained=d3LLM/d3LLM_Dream,trust_remote_code=True,max_new_tokens=256,diffusion_steps=256,dtype=bfloat16,temperature=0.1,top_p=0.9,alg=entropy,dParallel=False --tasks minerva_math --device cuda --batch_size 1 --num_fewshot 0 --output_path ./eval_tmp/minerva_4 --log_samples --confirm_run_unsafe_code --apply_chat_template


# d3LLM-Dream: generate_multi_block (no delay):
cd ~/Codes/d3LLM/utils/utils_Dream/eval_instruct
accelerate launch --main_process_port 12334 -m lm_eval --model diffllm --model_args pretrained=d3LLM/d3LLM_Dream,trust_remote_code=True,max_new_tokens=256,diffusion_steps=256,dtype=bfloat16,temperature=0.,alg=entropy_threshold,dParallel=False,threshold=0.4,generation_method=generation_multi_block,block_add_threshold=0.1,decoded_token_threshold=0.95,block_length=32,cache_delay_iter=10000 --tasks minerva_math --device cuda --batch_size 1 --num_fewshot 0 --output_path ./eval_tmp/minerva_4 --log_samples --confirm_run_unsafe_code --apply_chat_template


# d3LLM-Dream: generate_multi_block, delay=2:
cd ~/Codes/d3LLM/utils/utils_Dream/eval_instruct
accelerate launch --main_process_port 12334 -m lm_eval --model diffllm --model_args torch_compile=False,pretrained=d3LLM/d3LLM_Dream,trust_remote_code=True,max_new_tokens=256,diffusion_steps=256,dtype=bfloat16,temperature=0.,alg=entropy_threshold,dParallel=False,threshold=0.4,generation_method=generation_multi_block,block_add_threshold=0.1,decoded_token_threshold=0.95,block_length=32,cache_delay_iter=2,refresh_interval=10000,early_stop=True --tasks minerva_math --device cuda --batch_size 1 --num_fewshot 0 --output_path ./eval_tmp/minerva_4 --log_samples --confirm_run_unsafe_code --apply_chat_template