Skip to content

Commit 2ef00be

Browse files
Move slurm configs and scripts to experimental directory (#670)
1 parent 6a0e5a7 commit 2ef00be

File tree

6 files changed

+35
-16
lines changed

6 files changed

+35
-16
lines changed

experimental/slurm/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Running experiments on Slurm
2+
3+
To run GRPO on Slurm, please use the `submit.sh` script in this directory.
4+
5+
Usage:
6+
```
7+
./experimental/slurm/submit.sh qwen3_8b
8+
./experimental/slurm/submit.sh qwen3_32b
9+
./experimental/slurm/submit.sh qwen3_30b_a3b
10+
```
Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Grouped Relative Policy Optimization (GRPO)
22
# NOTE - This has not been tested for correctness yet! All testing so far has been only for infrastructure stability
3-
# ./apps/grpo/slurm/submit.sh qwen3_30b_a3b
3+
# ./experimental/slurm/submit.sh qwen3_30b_a3b
44

55
# Global configuration
66
group_size: 4
@@ -23,7 +23,7 @@ rollout_threads: 32 # make this 4x the number of policy replicas seems to work w
2323
# Observability configuration
2424
metric_logging:
2525
wandb:
26-
entity: agentic-models
26+
entity: torchforge
2727
project: grpo-training
2828
group: grpo_exp_${oc.env:USER}
2929
logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
@@ -83,21 +83,23 @@ trainer:
8383
disable_loss_parallel: true
8484
checkpoint:
8585
enable: true
86-
folder: ./checkpoint # The folder to save checkpoints to.
87-
initial_load_path: hf://${model} # The path to load the initial checkpoint from. Ignored if `folder` exists.
88-
initial_load_in_hf: true # If true, interpret initial_load_path as a HuggingFace model repo
86+
initial_load_path: hf://${model}
87+
initial_load_in_hf: true
88+
folder: ./checkpoint
8989
last_save_in_hf: true
9090
interval: 500
9191
async_mode: "disabled"
9292
activation_checkpoint:
9393
mode: full
94+
comm:
95+
# 30B MoE model can require more time to load checkpoint than 1.7B's 1200s
96+
init_timeout_seconds: 1800
9497

9598
# Replay buffer configuration
9699
replay_buffer:
97100
batch_size: ${local_batch_size}
98101
max_policy_age: ${off_by_n}
99-
# dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
100-
dp_size: 4
102+
dp_size: ${actors.trainer.procs}
101103

102104
# Reference model configuration
103105
ref_model:
@@ -122,6 +124,8 @@ ref_model:
122124
enable: true
123125
initial_load_path: hf://${model}
124126
initial_load_in_hf: true
127+
comm:
128+
init_timeout_seconds: 1800
125129

126130
# All resource allocations
127131
services:
@@ -134,6 +138,7 @@ services:
134138
ref_model:
135139
procs: 4
136140
num_replicas: 1
141+
hosts: 1
137142
with_gpus: true
138143
mesh_name: ref_model
139144
reward_actor:
@@ -148,7 +153,7 @@ actors:
148153
with_gpus: false
149154
mesh_name: dataset
150155
trainer:
151-
procs: 4
156+
procs: 8
152157
hosts: 1
153158
with_gpus: true
154159
mesh_name: trainer
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Grouped Relative Policy Optimization (GRPO)
22
# NOTE - This has not been tested for correctness yet! All testing so far has been only for infrastructure stability
3-
# ./apps/grpo/slurm/submit.sh qwen3_32b
3+
# ./experimental/slurm/submit.sh qwen3_32b
44

55
# Global configuration
66
group_size: 16
Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Grouped Relative Policy Optimization (GRPO)
2-
# ./apps/grpo/slurm/submit.sh qwen3_8b
2+
# ./experimental/slurm/submit.sh qwen3_8b
33

44
# Global configuration
55
group_size: 16
@@ -21,7 +21,7 @@ provisioner:
2121
# Observability configuration
2222
metric_logging:
2323
wandb:
24-
entity: agentic-models
24+
entity: torchforge
2525
project: grpo-training
2626
group: grpo_exp_${oc.env:USER}
2727
logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
@@ -89,6 +89,8 @@ trainer:
8989
activation_checkpoint:
9090
mode: selective
9191
selective_ac_option: op
92+
comm:
93+
init_timeout_seconds: 1800
9294

9395
# Replay buffer configuration
9496
replay_buffer:
@@ -121,6 +123,8 @@ ref_model:
121123
checkpoint:
122124
initial_load_path: hf://${model}
123125
initial_load_in_hf: true
126+
comm:
127+
init_timeout_seconds: 1800
124128

125129
# All resource allocations
126130
services:
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ CONFIG_NAME="${1}"
99

1010
sbatch --job-name="${CONFIG_NAME}" \
1111
--export=ALL,CONFIG_NAME="${CONFIG_NAME}" \
12-
apps/grpo/slurm/submit_grpo.sh
12+
experimental/slurm/submit_grpo.sh
1313

1414

1515
# Usage:
16-
# ./apps/grpo/slurm/submit.sh qwen3_8b
17-
# ./apps/grpo/slurm/submit.sh qwen3_32b
18-
# ./apps/grpo/slurm/submit.sh qwen3_30b_a3b
16+
# ./experimental/slurm/submit.sh qwen3_8b
17+
# ./experimental/slurm/submit.sh qwen3_32b
18+
# ./experimental/slurm/submit.sh qwen3_30b_a3b
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ export TORCHSTORE_RDMA_ENABLED=0
2626

2727
cd /storage/home/$USER/torchforge
2828

29-
srun python -m apps.grpo.main --config apps/grpo/slurm/${CONFIG_NAME}.yaml
29+
srun python -m apps.grpo.main --config experimental/slurm/${CONFIG_NAME}.yaml

0 commit comments

Comments
 (0)