Skip to content

Commit a2bccf4

Browse files
EpsilonFOlebaudantoine
authored andcommitted
🐛(backend) make start-recording atomic and fault-tolerant
Wrap Recording and RecordingAccess creation in a single transaction so a partial failure does not leave orphan rows, and return 409 instead of 500 when a recording is already in progress for the room. When the worker fails to start, transition the Recording to FAILED_TO_START so the unique partial constraint on (room, status) no longer blocks future recording attempts on the same room.
1 parent 6830250 commit a2bccf4

3 files changed

Lines changed: 97 additions & 12 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ and this project adheres to
1515
### Fixed
1616

1717
- ♻(frontend) standardize role terminology across localizations
18+
- 🐛(backend) make start-recording atomic and fault-tolerant
1819

1920
## [1.15.0] - 2026-04-30
2021

src/backend/core/api/viewsets.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
from urllib.parse import unquote, urlparse
77

88
from django.conf import settings
9+
from django.core.exceptions import ValidationError as DjangoValidationError
910
from django.core.files.storage import default_storage
11+
from django.db import IntegrityError, transaction
1012
from django.db.models import Q
1113
from django.http import Http404
1214
from django.shortcuts import get_object_or_404
@@ -320,26 +322,40 @@ def start_room_recording(self, request, pk=None): # pylint: disable=unused-argu
320322
options = serializer.validated_data.get("options")
321323
room = self.get_object()
322324

323-
# May raise exception if an active or initiated recording already exist for the room
324-
recording = models.Recording.objects.create(
325-
room=room,
326-
mode=mode,
327-
options=options.model_dump(exclude_none=True) if options else {},
328-
)
325+
try:
326+
with transaction.atomic():
327+
recording = models.Recording.objects.create(
328+
room=room,
329+
mode=mode,
330+
options=options.model_dump(exclude_none=True) if options else {},
331+
)
332+
models.RecordingAccess.objects.create(
333+
user=self.request.user,
334+
role=models.RoleChoices.OWNER,
335+
recording=recording,
336+
)
329337

330-
models.RecordingAccess.objects.create(
331-
user=self.request.user, role=models.RoleChoices.OWNER, recording=recording
332-
)
338+
except (DjangoValidationError, IntegrityError):
339+
# DjangoValidationError covers the Python-level check (full_clean);
340+
# IntegrityError covers the race where two concurrent requests both
341+
# pass that check and the DB-level UNIQUE constraint catches the loser.
342+
return drf_response.Response(
343+
{"error": f"A recording is already in progress for room {room.slug}"},
344+
status=drf_status.HTTP_409_CONFLICT,
345+
)
333346

334347
worker_service = get_worker_service(mode=recording.mode)
335348
worker_manager = WorkerServiceMediator(worker_service=worker_service)
336349

337350
try:
338351
worker_manager.start(recording)
339352
except RecordingStartError:
353+
models.Recording.objects.filter(pk=recording.pk).update(
354+
status=models.RecordingStatusChoices.FAILED_TO_START
355+
)
340356
return drf_response.Response(
341357
{"error": f"Recording failed to start for room {room.slug}"},
342-
status=drf_status.HTTP_500_INTERNAL_SERVER_ERROR,
358+
status=drf_status.HTTP_502_BAD_GATEWAY,
343359
)
344360

345361
if settings.METADATA_COLLECTOR_ENABLED and (

src/backend/core/tests/rooms/test_api_rooms_start_recording.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,16 +140,18 @@ def test_start_recording_worker_error(
140140

141141
mock_worker_service_factory.assert_called_once_with(mode="screen_recording")
142142

143-
assert response.status_code == 500
143+
assert response.status_code == 502
144144
assert response.json() == {
145145
"error": f"Recording failed to start for room {room.slug}"
146146
}
147147

148-
# Recording object should be created even if worker fails
148+
# Recording object should be created even if worker fails, and moved out
149+
# of the unique-constraint window so the room is not locked.
149150
assert Recording.objects.count() == 1
150151
recording = Recording.objects.first()
151152
assert recording.room == room
152153
assert recording.mode == "screen_recording"
154+
assert recording.status == "failed_to_start"
153155

154156
# Verify recording access details
155157
assert recording.accesses.count() == 1
@@ -158,6 +160,72 @@ def test_start_recording_worker_error(
158160
assert access.role == "owner"
159161

160162

163+
@pytest.mark.parametrize(
164+
"status",
165+
["active", "initiated"],
166+
)
167+
def test_start_recording_conflict_when_already_in_progress(
168+
status, mock_worker_service_factory, mock_worker_manager, settings
169+
):
170+
"""Should return 409 when a second start is attempted while a recording is already active."""
171+
settings.RECORDING_ENABLE = True
172+
173+
room = RoomFactory()
174+
user = UserFactory()
175+
room.accesses.create(user=user, role="owner")
176+
177+
# Pre-existing active recording for the same room.
178+
Recording.objects.create(room=room, mode="screen_recording", status="active")
179+
180+
client = APIClient()
181+
client.force_login(user)
182+
183+
response = client.post(
184+
f"/api/v1.0/rooms/{room.id}/start-recording/",
185+
{"mode": "screen_recording"},
186+
)
187+
188+
assert response.status_code == 409
189+
assert response.json() == {
190+
"error": f"A recording is already in progress for room {room.slug}"
191+
}
192+
# No new recording row, no access row leaked from the rolled-back transaction.
193+
assert Recording.objects.count() == 1
194+
assert Recording.objects.first().accesses.count() == 0
195+
mock_worker_manager.start.assert_not_called()
196+
197+
198+
def test_start_recording_after_worker_failure_unblocks_room(
199+
mock_worker_service_factory, mock_worker_manager, settings
200+
):
201+
"""Should allow a new recording when the previous recording failed."""
202+
settings.RECORDING_ENABLE = True
203+
204+
room = RoomFactory()
205+
user = UserFactory()
206+
room.accesses.create(user=user, role="owner")
207+
208+
mock_worker_manager.start = mock.Mock(
209+
side_effect=[RecordingStartError("boom"), None]
210+
)
211+
212+
client = APIClient()
213+
client.force_login(user)
214+
215+
first = client.post(
216+
f"/api/v1.0/rooms/{room.id}/start-recording/",
217+
{"mode": "screen_recording"},
218+
)
219+
assert first.status_code == 502
220+
221+
second = client.post(
222+
f"/api/v1.0/rooms/{room.id}/start-recording/",
223+
{"mode": "screen_recording"},
224+
)
225+
assert second.status_code == 201
226+
assert Recording.objects.count() == 2
227+
228+
161229
def test_start_recording_success(
162230
mock_worker_service_factory, mock_worker_manager, settings
163231
):

0 commit comments

Comments
 (0)