Move slurm configs and scripts to experimental directory (#670)

daniellepintz · web-flow · commit 2ef00bee783e · 2025-12-22T18:26:06.000-05:00
diff --git a/experimental/slurm/README.md b/experimental/slurm/README.md
@@ -0,0 +1,10 @@
+# Running experiments on Slurm
+
+To run GRPO on Slurm, please use the `submit.sh` script in this directory.
+
+Usage:
+```
+./experimental/slurm/submit.sh qwen3_8b
+./experimental/slurm/submit.sh qwen3_32b
+./experimental/slurm/submit.sh qwen3_30b_a3b
+```
diff --git a/experimental/slurm/qwen3_30b_a3b.yaml b/experimental/slurm/qwen3_30b_a3b.yaml
@@ -1,6 +1,6 @@
 # Grouped Relative Policy Optimization (GRPO)
 # NOTE - This has not been tested for correctness yet! All testing so far has been only for infrastructure stability
-# ./apps/grpo/slurm/submit.sh qwen3_30b_a3b
+# ./experimental/slurm/submit.sh qwen3_30b_a3b
 
 # Global configuration
 group_size: 4
@@ -23,7 +23,7 @@ rollout_threads: 32 # make this 4x the number of policy replicas seems to work w
 # Observability configuration
 metric_logging:
   wandb:
-    entity: agentic-models
+    entity: torchforge
     project: grpo-training
     group: grpo_exp_${oc.env:USER}
     logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
@@ -83,21 +83,23 @@ trainer:
     disable_loss_parallel: true
   checkpoint:
     enable: true
-    folder: ./checkpoint              # The folder to save checkpoints to.
-    initial_load_path: hf://${model}  # The path to load the initial checkpoint from. Ignored if `folder` exists.
-    initial_load_in_hf: true          # If true, interpret initial_load_path as a HuggingFace model repo
+    initial_load_path: hf://${model}
+    initial_load_in_hf: true
+    folder: ./checkpoint
     last_save_in_hf: true
     interval: 500
     async_mode: "disabled"
   activation_checkpoint:
     mode: full
+  comm:
+    # 30B MoE model can require more time to load checkpoint than 1.7B's 1200s
+    init_timeout_seconds: 1800
 
 # Replay buffer configuration
 replay_buffer:
   batch_size: ${local_batch_size}
   max_policy_age: ${off_by_n}
-  # dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
-  dp_size: 4
+  dp_size: ${actors.trainer.procs}
 
 # Reference model configuration
 ref_model:
@@ -122,6 +124,8 @@ ref_model:
     enable: true
     initial_load_path: hf://${model}
     initial_load_in_hf: true
+  comm:
+    init_timeout_seconds: 1800
 
 # All resource allocations
 services:
@@ -134,6 +138,7 @@ services:
   ref_model:
     procs: 4
     num_replicas: 1
+    hosts: 1
     with_gpus: true
     mesh_name: ref_model
   reward_actor:
@@ -148,7 +153,7 @@ actors:
     with_gpus: false
     mesh_name: dataset
   trainer:
-    procs: 4
+    procs: 8
     hosts: 1
     with_gpus: true
     mesh_name: trainer
diff --git a/experimental/slurm/qwen3_32b.yaml b/experimental/slurm/qwen3_32b.yaml
@@ -1,6 +1,6 @@
 # Grouped Relative Policy Optimization (GRPO)
 # NOTE - This has not been tested for correctness yet! All testing so far has been only for infrastructure stability
-# ./apps/grpo/slurm/submit.sh qwen3_32b
+# ./experimental/slurm/submit.sh qwen3_32b
 
 # Global configuration
 group_size: 16
diff --git a/experimental/slurm/qwen3_8b.yaml b/experimental/slurm/qwen3_8b.yaml
@@ -1,5 +1,5 @@
 # Grouped Relative Policy Optimization (GRPO)
-# ./apps/grpo/slurm/submit.sh qwen3_8b
+# ./experimental/slurm/submit.sh qwen3_8b
 
 # Global configuration
 group_size: 16
@@ -21,7 +21,7 @@ provisioner:
 # Observability configuration
 metric_logging:
   wandb:
-    entity: agentic-models
+    entity: torchforge
     project: grpo-training
     group: grpo_exp_${oc.env:USER}
     logging_mode: global_reduce # global_reduce, per_rank_reduce, per_rank_no_reduce
@@ -89,6 +89,8 @@ trainer:
   activation_checkpoint:
     mode: selective
     selective_ac_option: op
+  comm:
+    init_timeout_seconds: 1800
 
 # Replay buffer configuration
 replay_buffer:
@@ -121,6 +123,8 @@ ref_model:
   checkpoint:
     initial_load_path: hf://${model}
     initial_load_in_hf: true
+  comm:
+    init_timeout_seconds: 1800
 
 # All resource allocations
 services:
diff --git a/experimental/slurm/submit.sh b/experimental/slurm/submit.sh
@@ -9,10 +9,10 @@ CONFIG_NAME="${1}"
 
 sbatch --job-name="${CONFIG_NAME}" \
        --export=ALL,CONFIG_NAME="${CONFIG_NAME}" \
-       apps/grpo/slurm/submit_grpo.sh
+       experimental/slurm/submit_grpo.sh
 
 
 # Usage:
-# ./apps/grpo/slurm/submit.sh qwen3_8b
-# ./apps/grpo/slurm/submit.sh qwen3_32b
-# ./apps/grpo/slurm/submit.sh qwen3_30b_a3b
+# ./experimental/slurm/submit.sh qwen3_8b
+# ./experimental/slurm/submit.sh qwen3_32b
+# ./experimental/slurm/submit.sh qwen3_30b_a3b
diff --git a/experimental/slurm/submit_grpo.sh b/experimental/slurm/submit_grpo.sh
@@ -26,4 +26,4 @@ export TORCHSTORE_RDMA_ENABLED=0
 
 cd /storage/home/$USER/torchforge
 
-srun python -m apps.grpo.main --config apps/grpo/slurm/${CONFIG_NAME}.yaml
+srun python -m apps.grpo.main --config experimental/slurm/${CONFIG_NAME}.yaml

Original file line number	Diff line number	Diff line change
`@@ -26,4 +26,4 @@ export TORCHSTORE_RDMA_ENABLED=0`
`26`	`26`
`27`	`27`	`cd /storage/home/$USER/torchforge`
`28`	`28`
`29`		`-srun python -m apps.grpo.main --config apps/grpo/slurm/${CONFIG_NAME}.yaml`
	`29`	`+srun python -m apps.grpo.main --config experimental/slurm/${CONFIG_NAME}.yaml`