Skip to content

Commit 42ca9bf

Browse files
committed
fix: make replaygain use second pass stats
1 parent 4b00c76 commit 42ca9bf

2 files changed

Lines changed: 91 additions & 38 deletions

File tree

ffmpeg_normalize/_media_file.py

Lines changed: 82 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import shlex
77
from shutil import move, rmtree
88
from tempfile import mkdtemp
9-
from typing import TYPE_CHECKING, Iterable, Iterator, Literal, TypedDict
9+
from typing import TYPE_CHECKING, Iterable, Iterator, Literal, TypedDict, Union
1010

1111
from mutagen.id3 import ID3, TXXX
1212
from mutagen.mp3 import MP3
@@ -75,10 +75,17 @@ def __init__(
7575
current_ext = os.path.splitext(output_file)[1][1:]
7676
# we need to check if it's empty, e.g. /dev/null or NUL
7777
if current_ext == "" or self.output_file == os.devnull:
78+
_logger.debug(
79+
f"Current extension is unset, or output file is a null device, using extension: {self.ffmpeg_normalize.extension}"
80+
)
7881
self.output_ext = self.ffmpeg_normalize.extension
7982
else:
83+
_logger.debug(
84+
f"Current extension is set from output file, using extension: {current_ext}"
85+
)
8086
self.output_ext = current_ext
8187
self.streams: StreamDict = {"audio": {}, "video": {}, "subtitle": {}}
88+
self.temp_file: Union[str, None] = None
8289

8390
self.parse_streams()
8491

@@ -203,12 +210,17 @@ def run_normalization(self) -> None:
203210
# run the first pass to get loudness stats
204211
self._first_pass()
205212

206-
# shortcut to apply replaygain
213+
# for second pass, create a temp file
214+
temp_dir = mkdtemp()
215+
self.temp_file = os.path.join(temp_dir, f"out.{self.output_ext}")
216+
207217
if self.ffmpeg_normalize.replaygain:
208-
self._run_replaygain()
209-
return
218+
_logger.debug(
219+
"ReplayGain mode: Second pass will run with temporary file to get stats."
220+
)
221+
self.output_file = self.temp_file
210222

211-
# run the second pass as a whole
223+
# run the second pass as a whole.
212224
if self.ffmpeg_normalize.progress:
213225
with tqdm(
214226
total=100,
@@ -222,7 +234,20 @@ def run_normalization(self) -> None:
222234
for _ in self._second_pass():
223235
pass
224236

225-
_logger.info(f"Normalized file written to {self.output_file}")
237+
# remove temp dir; this will remove the temp file as well if it has not been renamed (e.g. for replaygain)
238+
if os.path.exists(temp_dir):
239+
rmtree(temp_dir, ignore_errors=True)
240+
241+
# This will use stats from ebu_pass2 if available (from the main second pass),
242+
# or fall back to ebu_pass1.
243+
if self.ffmpeg_normalize.replaygain:
244+
_logger.debug(
245+
"ReplayGain tagging is enabled. Proceeding with tag calculation/application."
246+
)
247+
self._run_replaygain()
248+
249+
if not self.ffmpeg_normalize.replaygain:
250+
_logger.info(f"Normalized file written to {self.output_file}")
226251

227252
def _run_replaygain(self) -> None:
228253
"""
@@ -233,13 +258,32 @@ def _run_replaygain(self) -> None:
233258
# get the audio streams
234259
audio_streams = list(self.streams["audio"].values())
235260

236-
# get the loudnorm stats from the first pass
237-
loudnorm_stats = audio_streams[0].loudness_statistics["ebu_pass1"]
261+
# Attempt to use EBU pass 2 statistics, which account for pre-filters.
262+
# These are populated by the main second pass if it runs (not a dry run)
263+
# and normalization_type is 'ebu'.
264+
loudness_stats_source = "ebu_pass2"
265+
loudnorm_stats = audio_streams[0].loudness_statistics.get("ebu_pass2")
266+
267+
if loudnorm_stats is None:
268+
_logger.warning(
269+
"ReplayGain: Second pass EBU statistics (ebu_pass2) not found. "
270+
"Falling back to first pass EBU statistics (ebu_pass1). "
271+
"This may not account for pre-filters if any are used."
272+
)
273+
loudness_stats_source = "ebu_pass1"
274+
loudnorm_stats = audio_streams[0].loudness_statistics.get("ebu_pass1")
238275

239276
if loudnorm_stats is None:
240-
_logger.error("no loudnorm stats available in first pass stats!")
277+
_logger.error(
278+
f"ReplayGain: No loudness statistics available from {loudness_stats_source} (and fallback) for stream 0. "
279+
"Cannot calculate ReplayGain tags."
280+
)
241281
return
242282

283+
_logger.debug(
284+
f"Using statistics from {loudness_stats_source} for ReplayGain calculation."
285+
)
286+
243287
# apply the replaygain tag from the first audio stream (to all audio streams)
244288
if len(audio_streams) > 1:
245289
_logger.warning(
@@ -249,23 +293,31 @@ def _run_replaygain(self) -> None:
249293
)
250294

251295
target_level = self.ffmpeg_normalize.target_level
252-
input_i = loudnorm_stats["input_i"] # Integrated loudness
253-
input_tp = loudnorm_stats["input_tp"] # True peak
296+
# Use 'input_i' and 'input_tp' from the chosen stats.
297+
# For ebu_pass2, these are measurements *after* pre-filter but *before* loudnorm adjustment.
298+
input_i = loudnorm_stats.get("input_i")
299+
input_tp = loudnorm_stats.get("input_tp")
254300

255301
if input_i is None or input_tp is None:
256-
_logger.error("no input_i or input_tp available in first pass stats!")
302+
_logger.error(
303+
f"ReplayGain: 'input_i' or 'input_tp' missing from {loudness_stats_source} statistics. "
304+
"Cannot calculate ReplayGain tags."
305+
)
257306
return
258307

259308
track_gain = -(input_i - target_level) # dB
260309
track_peak = 10 ** (input_tp / 20) # linear scale
261310

262-
_logger.debug(f"Track gain: {track_gain} dB")
263-
_logger.debug(f"Track peak: {track_peak}")
311+
_logger.debug(f"Calculated Track gain: {track_gain:.2f} dB")
312+
_logger.debug(f"Calculated Track peak: {track_peak:.2f}")
264313

265-
if not self.ffmpeg_normalize.dry_run:
314+
if not self.ffmpeg_normalize.dry_run: # This uses the overall dry_run state
266315
self._write_replaygain_tags(track_gain, track_peak)
267316
else:
268-
_logger.warning("Dry run used, not actually writing replaygain tags")
317+
_logger.warning(
318+
"Overall dry_run is enabled, not actually writing ReplayGain tags to the file. "
319+
"Tag calculation based on available stats was performed."
320+
)
269321

270322
def _write_replaygain_tags(self, track_gain: float, track_peak: float) -> None:
271323
"""
@@ -554,33 +606,27 @@ def _second_pass(self) -> Iterator[float]:
554606

555607
cmd_runner = CommandRunner()
556608
try:
557-
try:
558-
yield from cmd_runner.run_ffmpeg_command(cmd)
559-
except Exception as e:
560-
_logger.error(
561-
f"Error while running command {shlex.join(cmd)}! Error: {e}"
562-
)
563-
raise e
564-
else:
565-
if self.output_file != os.devnull:
566-
_logger.debug(
567-
f"Moving temporary file from {temp_file} to {self.output_file}"
568-
)
569-
move(temp_file, self.output_file)
570-
rmtree(temp_dir, ignore_errors=True)
609+
yield from cmd_runner.run_ffmpeg_command(cmd)
571610
except Exception as e:
572-
if self.output_file != os.devnull:
573-
rmtree(temp_dir, ignore_errors=True)
611+
_logger.error(f"Error while running command {shlex.join(cmd)}! Error: {e}")
574612
raise e
613+
else:
614+
# only move the temp file if it's not a null device and ReplayGain is not enabled!
615+
if self.output_file != os.devnull and not self.ffmpeg_normalize.replaygain:
616+
_logger.debug(
617+
f"Moving temporary file from {temp_file} to {self.output_file}"
618+
)
619+
move(temp_file, self.output_file)
575620

576621
output = cmd_runner.get_output()
577622
# in the second pass, we do not normalize stream-by-stream, so we set the stats based on the
578623
# overall output (which includes multiple loudnorm stats)
579624
if self.ffmpeg_normalize.normalization_type == "ebu":
580-
all_stats = AudioStream.prune_and_parse_loudnorm_output(output)
581-
for stream_id, audio_stream in self.streams["audio"].items():
582-
if stream_id in all_stats:
583-
audio_stream.set_second_pass_stats(all_stats[stream_id])
625+
ebu_pass_2_stats = list(
626+
AudioStream.prune_and_parse_loudnorm_output(output).values()
627+
)
628+
for idx, audio_stream in enumerate(self.streams["audio"].values()):
629+
audio_stream.set_second_pass_stats(ebu_pass_2_stats[idx])
584630

585631
# warn if self.media_file.ffmpeg_normalize.dynamic == False and any of the second pass stats contain "normalization_type" == "dynamic"
586632
if self.ffmpeg_normalize.dynamic is False:

ffmpeg_normalize/_streams.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ def __init__(
6565
self.media_file = media_file
6666
self.stream_type = stream_type
6767
self.stream_id = stream_id
68+
_logger.debug(
69+
f"Created MediaStream for {self.media_file.input_file}, {self.stream_type} stream {self.stream_id}"
70+
)
6871

6972
def __repr__(self) -> str:
7073
return (
@@ -175,6 +178,9 @@ def set_second_pass_stats(self, stats: EbuLoudnessStatistics) -> None:
175178
Args:
176179
stats (dict): The EBU loudness statistics.
177180
"""
181+
_logger.debug(
182+
f"Setting second pass stats for stream {self.stream_id} from {stats}"
183+
)
178184
self.loudness_statistics["ebu_pass2"] = stats
179185

180186
def get_pcm_codec(self) -> str:
@@ -339,8 +345,9 @@ def prune_and_parse_loudnorm_output(
339345
output (str): The output from ffmpeg.
340346
341347
Returns:
342-
list: The EBU loudness statistics.
348+
dict[int, EbuLoudnessStatistics]: The EBU loudness statistics.
343349
"""
350+
_logger.debug("Parsing loudnorm stats from output")
344351
pruned_output = CommandRunner.prune_ffmpeg_progress_from_output(output)
345352
output_lines = [line.strip() for line in pruned_output.split("\n")]
346353
return AudioStream._parse_loudnorm_output(output_lines)
@@ -359,7 +366,7 @@ def _parse_loudnorm_output(
359366
FFmpegNormalizeError: When the output could not be parsed.
360367
361368
Returns:
362-
EbuLoudnessStatistics: The EBU loudness statistics, if found.
369+
dict[int, EbuLoudnessStatistics]: stream index and the EBU loudness statistics, if found.
363370
"""
364371
result = dict[int, EbuLoudnessStatistics]()
365372
stream_index = -1

0 commit comments

Comments
 (0)