File tree Expand file tree Collapse file tree 4 files changed +34
-30
lines changed
Expand file tree Collapse file tree 4 files changed +34
-30
lines changed Original file line number Diff line number Diff line change 22# >>> python -m apps.grpo.main --config apps/grpo/llama3_8b.yaml
33
44# Global configuration
5- group_size : 4
6- local_batch_size : 4 # per-device batch size
5+ group_size : 8
6+ local_batch_size : 8 # per-device batch size
77max_req_tokens : 1024
88max_res_tokens : 2048
99model : " meta-llama/Meta-Llama-3.1-8B-Instruct"
@@ -91,7 +91,7 @@ replay_buffer:
9191 # 2 GPUs for the trainer and we're using full FSDP.
9292 dp_size : 2
9393
94- # Reference model configuration
94+ # Reference model configuration (if enabled in services)
9595ref_model :
9696 model :
9797 name : llama3
@@ -117,16 +117,19 @@ ref_model:
117117
118118# All resource allocations
119119services :
120+ # Ref model is only necessary if the loss requires it (GRPO with beta>0),
121+ # but we recommend using DAPO instead
122+ # ref_model:
123+ # ref_model:
124+ # procs: 1
125+ # num_replicas: 1
126+ # with_gpus: true
127+ # mesh_name: ref_model
120128 generator :
121129 procs : ${generator.engine_args.tensor_parallel_size}
122130 num_replicas : 1
123131 with_gpus : true
124132 mesh_name : generator
125- ref_model :
126- procs : 1
127- num_replicas : 1
128- with_gpus : true
129- mesh_name : ref_model
130133 reward_actor :
131134 procs : 1
132135 num_replicas : 1
Original file line number Diff line number Diff line change 2424from forge .observability .metrics import record_metric , Reduce
2525from forge .observability .perf_tracker import Tracer
2626from forge .rl import collate , ComputeAdvantages , Episode , RewardActor
27- from forge .rl .loss import GRPOLoss
27+ from forge .rl .loss import DAPOLoss , GRPOLoss
2828from forge .types import LauncherConfig , ProvisionerConfig
2929from forge .util .checkpoint import drop_weights
3030from forge .util .config import parse
@@ -68,12 +68,7 @@ async def main(cfg: DictConfig):
6868 )
6969
7070 # ---- Setup loss function ---- #
71- loss_fn = GRPOLoss (
72- clip_low = 0.2 ,
73- clip_high = 0.28 ,
74- beta = 0.1 ,
75- agg_type = "fixed_horizon" ,
76- )
71+ loss_fn = DAPOLoss ()
7772
7873 # Fail-fast: Check loss/ref_model compatibility before spawning actors
7974 uses_ref_model = cfg .get ("services" , {}).get ("ref_model" ) is not None
Original file line number Diff line number Diff line change 33
44# Global configuration
55group_size : 8
6- local_batch_size : 16 # per-device batch size
6+ local_batch_size : 8 # per-device batch size
77max_req_tokens : 1024
88max_res_tokens : 2048
99model : " Qwen/Qwen3-1.7B"
@@ -92,7 +92,7 @@ replay_buffer:
9292 max_policy_age : ${off_by_n}
9393 dp_size : ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
9494
95- # Reference model configuration
95+ # Reference model configuration (if enabled in services)
9696ref_model :
9797 model :
9898 name : qwen3
@@ -118,16 +118,19 @@ ref_model:
118118
119119# All resource allocations
120120services :
121+ # Ref model is only necessary if the loss requires it (GRPO with beta>0),
122+ # but we recommend using DAPO instead
123+ # ref_model:
124+ # ref_model:
125+ # procs: 1
126+ # num_replicas: 1
127+ # with_gpus: true
128+ # mesh_name: ref_model
121129 generator :
122130 procs : ${generator.engine_args.tensor_parallel_size}
123131 num_replicas : 1
124132 mesh_name : generator
125133 with_gpus : true
126- ref_model :
127- procs : 1
128- num_replicas : 1
129- mesh_name : ref_model
130- with_gpus : true
131134 reward_actor :
132135 procs : 1
133136 num_replicas : 1
Original file line number Diff line number Diff line change 22# >>> python -m apps.grpo.main --config apps/grpo/qwen3_8b.yaml
33
44# Global configuration
5- group_size : 16
6- local_batch_size : 4 # per-device batch size
5+ group_size : 8
6+ local_batch_size : 8 # per-device batch size
77max_req_tokens : 1024
88max_res_tokens : 2048
99model : " Qwen/Qwen3-8B"
@@ -91,7 +91,7 @@ replay_buffer:
9191 # 2 GPUs for the trainer and we're using full FSDP.
9292 dp_size : 2
9393
94- # Reference model configuration
94+ # Reference model configuration (if enabled in services)
9595ref_model :
9696 model :
9797 name : qwen3
@@ -117,16 +117,19 @@ ref_model:
117117
118118# All resource allocations
119119services :
120+ # Ref model is only necessary if the loss requires it (GRPO with beta>0),
121+ # but we recommend using DAPO instead
122+ # ref_model:
123+ # ref_model:
124+ # procs: 1
125+ # num_replicas: 1
126+ # with_gpus: true
127+ # mesh_name: ref_model
120128 generator :
121129 procs : ${generator.engine_args.tensor_parallel_size}
122130 num_replicas : 1
123131 with_gpus : true
124132 mesh_name : generator
125- ref_model :
126- procs : 1
127- num_replicas : 1
128- with_gpus : true
129- mesh_name : ref_model
130133 reward_actor :
131134 procs : 1
132135 num_replicas : 1
You can’t perform that action at this time.
0 commit comments