Skip to content

Commit 898be00

Browse files
felipemello1Felipe Mello
andauthored
make dapo default (#736)
Co-authored-by: Felipe Mello <felipemello@fb.com>
1 parent b288ff4 commit 898be00

File tree

4 files changed

+34
-30
lines changed

4 files changed

+34
-30
lines changed

apps/grpo/llama3_8b.yaml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
# >>> python -m apps.grpo.main --config apps/grpo/llama3_8b.yaml
33

44
# Global configuration
5-
group_size: 4
6-
local_batch_size: 4 # per-device batch size
5+
group_size: 8
6+
local_batch_size: 8 # per-device batch size
77
max_req_tokens: 1024
88
max_res_tokens: 2048
99
model: "meta-llama/Meta-Llama-3.1-8B-Instruct"
@@ -91,7 +91,7 @@ replay_buffer:
9191
# 2 GPUs for the trainer and we're using full FSDP.
9292
dp_size: 2
9393

94-
# Reference model configuration
94+
# Reference model configuration (if enabled in services)
9595
ref_model:
9696
model:
9797
name: llama3
@@ -117,16 +117,19 @@ ref_model:
117117

118118
# All resource allocations
119119
services:
120+
# Ref model is only necessary if the loss requires it (GRPO with beta>0),
121+
# but we recommend using DAPO instead
122+
# ref_model:
123+
# ref_model:
124+
# procs: 1
125+
# num_replicas: 1
126+
# with_gpus: true
127+
# mesh_name: ref_model
120128
generator:
121129
procs: ${generator.engine_args.tensor_parallel_size}
122130
num_replicas: 1
123131
with_gpus: true
124132
mesh_name: generator
125-
ref_model:
126-
procs: 1
127-
num_replicas: 1
128-
with_gpus: true
129-
mesh_name: ref_model
130133
reward_actor:
131134
procs: 1
132135
num_replicas: 1

apps/grpo/main.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from forge.observability.metrics import record_metric, Reduce
2525
from forge.observability.perf_tracker import Tracer
2626
from forge.rl import collate, ComputeAdvantages, Episode, RewardActor
27-
from forge.rl.loss import GRPOLoss
27+
from forge.rl.loss import DAPOLoss, GRPOLoss
2828
from forge.types import LauncherConfig, ProvisionerConfig
2929
from forge.util.checkpoint import drop_weights
3030
from forge.util.config import parse
@@ -68,12 +68,7 @@ async def main(cfg: DictConfig):
6868
)
6969

7070
# ---- Setup loss function ---- #
71-
loss_fn = GRPOLoss(
72-
clip_low=0.2,
73-
clip_high=0.28,
74-
beta=0.1,
75-
agg_type="fixed_horizon",
76-
)
71+
loss_fn = DAPOLoss()
7772

7873
# Fail-fast: Check loss/ref_model compatibility before spawning actors
7974
uses_ref_model = cfg.get("services", {}).get("ref_model") is not None

apps/grpo/qwen3_1_7b.yaml

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# Global configuration
55
group_size: 8
6-
local_batch_size: 16 # per-device batch size
6+
local_batch_size: 8 # per-device batch size
77
max_req_tokens: 1024
88
max_res_tokens: 2048
99
model: "Qwen/Qwen3-1.7B"
@@ -92,7 +92,7 @@ replay_buffer:
9292
max_policy_age: ${off_by_n}
9393
dp_size: ${trainer.parallelism.data_parallel_shard_degree} # Must equal trainer DP degree
9494

95-
# Reference model configuration
95+
# Reference model configuration (if enabled in services)
9696
ref_model:
9797
model:
9898
name: qwen3
@@ -118,16 +118,19 @@ ref_model:
118118

119119
# All resource allocations
120120
services:
121+
# Ref model is only necessary if the loss requires it (GRPO with beta>0),
122+
# but we recommend using DAPO instead
123+
# ref_model:
124+
# ref_model:
125+
# procs: 1
126+
# num_replicas: 1
127+
# with_gpus: true
128+
# mesh_name: ref_model
121129
generator:
122130
procs: ${generator.engine_args.tensor_parallel_size}
123131
num_replicas: 1
124132
mesh_name: generator
125133
with_gpus: true
126-
ref_model:
127-
procs: 1
128-
num_replicas: 1
129-
mesh_name: ref_model
130-
with_gpus: true
131134
reward_actor:
132135
procs: 1
133136
num_replicas: 1

apps/grpo/qwen3_8b.yaml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
# >>> python -m apps.grpo.main --config apps/grpo/qwen3_8b.yaml
33

44
# Global configuration
5-
group_size: 16
6-
local_batch_size: 4 # per-device batch size
5+
group_size: 8
6+
local_batch_size: 8 # per-device batch size
77
max_req_tokens: 1024
88
max_res_tokens: 2048
99
model: "Qwen/Qwen3-8B"
@@ -91,7 +91,7 @@ replay_buffer:
9191
# 2 GPUs for the trainer and we're using full FSDP.
9292
dp_size: 2
9393

94-
# Reference model configuration
94+
# Reference model configuration (if enabled in services)
9595
ref_model:
9696
model:
9797
name: qwen3
@@ -117,16 +117,19 @@ ref_model:
117117

118118
# All resource allocations
119119
services:
120+
# Ref model is only necessary if the loss requires it (GRPO with beta>0),
121+
# but we recommend using DAPO instead
122+
# ref_model:
123+
# ref_model:
124+
# procs: 1
125+
# num_replicas: 1
126+
# with_gpus: true
127+
# mesh_name: ref_model
120128
generator:
121129
procs: ${generator.engine_args.tensor_parallel_size}
122130
num_replicas: 1
123131
with_gpus: true
124132
mesh_name: generator
125-
ref_model:
126-
procs: 1
127-
num_replicas: 1
128-
with_gpus: true
129-
mesh_name: ref_model
130133
reward_actor:
131134
procs: 1
132135
num_replicas: 1

0 commit comments

Comments
 (0)