Skip to content

Commit 1ad7e7c

Browse files
Fix appending index to replicas on recovery (#747)
1 parent f1bedc1 commit 1ad7e7c

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

src/forge/controller/service/replica.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,16 @@ class Replica:
133133
# Metrics tracking
134134
metrics: ReplicaMetrics = field(default_factory=ReplicaMetrics)
135135

136+
_base_mesh_name: str | None = field(default=None, init=False)
137+
_mesh_name_initialized: bool = field(default=False, init=False)
138+
136139
def __post_init__(self):
137140
# This semaphore is used to enforce max_concurrent_requests
138141
# Once it is acquired max_concurrent_requests times, future
139142
# requests are blocked until standing requests complete.
140143
self._capacity_semaphore = asyncio.Semaphore(self.max_concurrent_requests)
144+
# Store the original mesh name before any modifications
145+
self._base_mesh_name = self.proc_config.mesh_name
141146

142147
# Initialization related functionalities
143148

@@ -158,11 +163,13 @@ async def initialize(self):
158163
logger.debug(f"Launching actor for replica {self.idx}")
159164

160165
# If a Mesh name was specified, incorporate this info.
161-
if self.proc_config.mesh_name:
162-
mesh_name_with_replica = f"{self.proc_config.mesh_name}_{self.idx}"
166+
# Use the stored base name to avoid appending _idx multiple times during recovery
167+
if self._base_mesh_name and not self._mesh_name_initialized:
168+
mesh_name_with_replica = f"{self._base_mesh_name}_{self.idx}"
163169
self.proc_config.mesh_name = mesh_name_with_replica
164170
if hasattr(self.actor_def, "mesh_name"):
165171
self.actor_def.mesh_name = mesh_name_with_replica
172+
self._mesh_name_initialized = True
166173

167174
self.actor = await self.actor_def.launch(
168175
*self.actor_args,

0 commit comments

Comments
 (0)