44
55import json
66import time
7+ from datetime import datetime
78from typing import Optional
89
910import openai
2829 PROMPT_USER_PART ,
2930)
3031from summary .core .transcript_formatter import TranscriptFormatter
32+ from summary .core .user_assign import assign_speakers
3133from summary .core .webhook_service import submit_content
3234
3335settings = get_settings ()
@@ -58,7 +60,7 @@ def init_sentry(**_kwargs):
5860file_service = FileService ()
5961
6062
61- def transcribe_audio (task_id , filename , language ):
63+ def transcribe_audio (task_id , recording_filename , language ):
6264 """Transcribe an audio file using WhisperX.
6365
6466 Downloads the audio from MinIO, sends it to WhisperX for transcription,
@@ -75,9 +77,13 @@ def transcribe_audio(task_id, filename, language):
7577
7678 # Transcription
7779 try :
78- with file_service .prepare_audio_file (filename ) as (audio_file , metadata ):
80+ with file_service .prepare_audio_file (recording_filename ) as (
81+ audio_file ,
82+ metadata ,
83+ ):
7984 metadata_manager .track (task_id , {"audio_length" : metadata ["duration" ]})
8085
86+ # Compute language parameter
8187 if language is None :
8288 language = settings .whisperx_default_language
8389 logger .info (
@@ -90,22 +96,25 @@ def transcribe_audio(task_id, filename, language):
9096 language ,
9197 )
9298
99+ # Call remote service for transcription
93100 transcription_start_time = time .time ()
94-
95101 transcription = whisperx_client .audio .transcriptions .create (
96102 model = settings .whisperx_asr_model , file = audio_file , language = language
97103 )
98104
99- transcription_time = round (time .time () - transcription_start_time , 2 )
105+ # Logging
106+ transcription_duration = round (time .time () - transcription_start_time , 2 )
100107 metadata_manager .track (
101108 task_id ,
102- {"transcription_time" : transcription_time },
109+ {"transcription_time" : transcription_duration },
110+ )
111+ logger .info (
112+ "Transcription received in %.2f seconds." , transcription_duration
103113 )
104- logger .info ("Transcription received in %.2f seconds." , transcription_time )
105114 logger .debug ("Transcription: \n %s" , transcription )
106115
107116 except FileServiceException :
108- logger .exception ("Unexpected error for filename : %s" , filename )
117+ logger .exception ("Unexpected error for recording : %s" , recording_filename )
109118 return None
110119
111120 metadata_manager .track_transcription_metadata (task_id , transcription )
@@ -117,8 +126,8 @@ def format_transcript(
117126 context_language ,
118127 language ,
119128 room ,
120- recording_date ,
121- recording_time ,
129+ recording_datetime ,
130+ owner_timezone ,
122131 download_link ,
123132):
124133 """Format a transcription into readable content with a title.
@@ -134,8 +143,8 @@ def format_transcript(
134143 return formatter .format (
135144 transcription ,
136145 room = room ,
137- recording_date = recording_date ,
138- recording_time = recording_time ,
146+ recording_datetime = recording_datetime ,
147+ owner_timezone = owner_timezone ,
139148 download_link = download_link ,
140149 )
141150
@@ -167,16 +176,19 @@ def format_actions(llm_output: dict) -> str:
167176def process_audio_transcribe_summarize_v2 (
168177 self ,
169178 owner_id : str ,
170- filename : str ,
179+ recording_filename : str ,
180+ metadata_filename : str ,
171181 email : str ,
172182 sub : str ,
173183 received_at : float ,
174184 room : Optional [str ],
175- recording_date : Optional [str ],
176- recording_time : Optional [str ],
185+ worker_id : Optional [str ],
186+ owner_timezone : Optional [str ],
177187 language : Optional [str ],
178188 download_link : Optional [str ],
179189 context_language : Optional [str ] = None ,
190+ recording_started_at : Optional [str ] = None ,
191+ recording_ended_at : Optional [str ] = None ,
180192):
181193 """Process an audio file by transcribing it and generating a summary.
182194
@@ -189,16 +201,21 @@ def process_audio_transcribe_summarize_v2(
189201 Args:
190202 self: Celery task instance (passed on with bind=True)
191203 owner_id: Unique identifier of the recording owner.
192- filename: Name of the audio file in MinIO storage.
204+ recording_filename: Name of the audio file in MinIO storage.
205+ metadata_filename: Name of the audio file in MinIO storage.
193206 email: Email address of the recording owner.
194207 sub: OIDC subject identifier of the recording owner.
195208 received_at: Unix timestamp when the recording was received.
196209 room: room name where the recording took place.
197- recording_date: Date of the recording (localized display string) .
198- recording_time: Time of the recording (localized display string ).
210+ worker_id: LiveKit egress ID used to fetch the egress JSON from S3 .
211+ owner_timezone: IANA timezone of the recording owner (e.g. "Europe/Paris" ).
199212 language: ISO 639-1 language code for transcription.
200213 download_link: URL to download the original recording.
201214 context_language: ISO 639-1 language code of the meeting summary context text.
215+ recording_started_at: ISO 8601 timestamp of when file recording actually started
216+ (from LiveKit FileInfo.started_at via the egress_ended webhook).
217+ recording_ended_at: ISO 8601 timestamp of when file recording ended
218+ (from LiveKit FileInfo.ended_at via the egress_ended webhook).
202219 """
203220 logger .info (
204221 "Notification received | Owner: %s | Room: %s" ,
@@ -208,17 +225,44 @@ def process_audio_transcribe_summarize_v2(
208225
209226 task_id = self .request .id
210227
211- transcription = transcribe_audio (task_id , filename , language )
228+ # Transcribe the audio
229+ transcription = transcribe_audio (task_id , recording_filename , language )
212230 if transcription is None :
213231 return
214232
233+ # Use recording_started_at / recording_ended_at from the backend
234+ # (sourced from LiveKit FileInfo via the egress_ended webhook).
235+ recording_start_dt = None
236+ recording_end_dt = None
237+ if recording_start_dt :
238+ recording_start_dt = datetime .fromisoformat (recording_started_at )
239+ if recording_ended_at :
240+ recording_end_dt = datetime .fromisoformat (recording_ended_at )
241+
242+ # Assign user names from metadata and transcription
243+ try :
244+ metadata = file_service .read_json (metadata_filename )
245+ except Exception :
246+ logger .exception ("Failed to read metadata file: %s" , metadata_filename )
247+ metadata = None
248+ if metadata and recording_start_dt is not None and recording_end_dt is not None :
249+ assignment_result = assign_speakers (
250+ metadata ,
251+ transcription ,
252+ recording_start_dt ,
253+ recording_end_dt ,
254+ overlap_threshold = 0.5 ,
255+ )
256+ transcription = assignment_result .apply ({"segments" : transcription .segments })
257+
258+ # Format output
215259 content , title = format_transcript (
216260 transcription ,
217261 context_language ,
218262 language ,
219263 room ,
220- recording_date ,
221- recording_time ,
264+ recording_started_at ,
265+ owner_timezone ,
222266 download_link ,
223267 )
224268
0 commit comments