Skip to content

Commit e3687bd

Browse files
committed
feat: better file validation
1 parent a32d644 commit e3687bd

5 files changed

Lines changed: 254 additions & 1 deletion

File tree

src/ffmpeg_normalize/__main__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,18 @@ def _split_options(opts: str) -> list[str]:
723723
if not input_files:
724724
error("No input files specified. Use positional arguments or --input-list.")
725725

726+
# Validate all input files upfront before processing
727+
_logger.debug("Validating all input files before processing...")
728+
validation_errors = FFmpegNormalize.validate_input_files(input_files)
729+
if validation_errors:
730+
_logger.error("Validation failed for the following files:")
731+
for err in validation_errors:
732+
_logger.error(f" - {err}")
733+
error(
734+
f"Validation failed for {len(validation_errors)} file(s). "
735+
"Please fix the issues above and try again."
736+
)
737+
726738
for index, input_file in enumerate(input_files):
727739
if cli_args.output is not None and index < len(cli_args.output):
728740
if cli_args.output_folder and cli_args.output_folder != "normalized":

src/ffmpeg_normalize/_cmd_utils.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,76 @@ def ffmpeg_has_loudnorm() -> bool:
190190
"Please make sure you are running ffmpeg v4.2 or above."
191191
)
192192
return supports_loudnorm
193+
194+
195+
def validate_input_file(input_file: str) -> tuple[bool, str | None]:
196+
"""
197+
Validate that an input file exists, is readable, and contains audio streams.
198+
199+
This function performs a lightweight probe of the file using ffmpeg to check
200+
if it can be read and contains at least one audio stream.
201+
202+
Args:
203+
input_file: Path to the input file to validate
204+
205+
Returns:
206+
tuple: (is_valid, error_message)
207+
- is_valid: True if the file is valid, False otherwise
208+
- error_message: None if valid, otherwise a descriptive error message
209+
"""
210+
# Check if file exists
211+
if not os.path.exists(input_file):
212+
return False, f"File does not exist: {input_file}"
213+
214+
# Check if it's actually a file (not a directory)
215+
if not os.path.isfile(input_file):
216+
return False, f"Path is not a file: {input_file}"
217+
218+
# Check if file is readable
219+
if not os.access(input_file, os.R_OK):
220+
return False, f"File is not readable (permission denied): {input_file}"
221+
222+
# Check if file has audio streams using ffmpeg probe
223+
ffmpeg_exe = get_ffmpeg_exe()
224+
cmd = [
225+
ffmpeg_exe,
226+
"-i",
227+
input_file,
228+
"-c",
229+
"copy",
230+
"-t",
231+
"0",
232+
"-map",
233+
"0",
234+
"-f",
235+
"null",
236+
os.devnull,
237+
]
238+
239+
try:
240+
output = CommandRunner().run_command(cmd).get_output()
241+
except RuntimeError as e:
242+
error_str = str(e)
243+
# Extract a cleaner error message from ffmpeg output
244+
if "Invalid data found" in error_str:
245+
return False, f"Invalid or corrupted media file: {input_file}"
246+
if "No such file or directory" in error_str:
247+
return False, f"File not found or cannot be opened: {input_file}"
248+
if "Permission denied" in error_str:
249+
return False, f"Permission denied when reading file: {input_file}"
250+
if "does not contain any stream" in error_str:
251+
return False, f"File contains no media streams: {input_file}"
252+
# Generic error for other ffmpeg failures
253+
return False, f"Cannot read media file: {input_file}"
254+
255+
# Check for audio streams in the output
256+
has_audio = False
257+
for line in output.split("\n"):
258+
if line.strip().startswith("Stream") and "Audio" in line:
259+
has_audio = True
260+
break
261+
262+
if not has_audio:
263+
return False, f"File does not contain any audio streams: {input_file}"
264+
265+
return True, None

src/ffmpeg_normalize/_ffmpeg_normalize.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from tqdm import tqdm
1111

12-
from ._cmd_utils import ffmpeg_has_loudnorm, get_ffmpeg_exe
12+
from ._cmd_utils import ffmpeg_has_loudnorm, get_ffmpeg_exe, validate_input_file
1313
from ._errors import FFmpegNormalizeError
1414
from ._media_file import MediaFile
1515

@@ -317,6 +317,28 @@ def add_media_file(self, input_file: str, output_file: str) -> None:
317317
self.media_files.append(MediaFile(self, input_file, output_file))
318318
self.file_count += 1
319319

320+
@staticmethod
321+
def validate_input_files(input_files: list[str]) -> list[str]:
322+
"""
323+
Validate all input files before processing.
324+
325+
This method checks that each input file exists, is readable, and contains
326+
at least one audio stream. All files are validated upfront so that users
327+
can fix all issues before rerunning the batch.
328+
329+
Args:
330+
input_files: List of input file paths to validate
331+
332+
Returns:
333+
list: List of error messages for invalid files. Empty if all files are valid.
334+
"""
335+
errors = []
336+
for input_file in input_files:
337+
is_valid, error_msg = validate_input_file(input_file)
338+
if not is_valid and error_msg:
339+
errors.append(error_msg)
340+
return errors
341+
320342
def _calculate_batch_reference(self) -> float | None:
321343
"""
322344
Calculate the batch reference loudness by averaging measurements across all files.

tests/test_all.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,3 +599,62 @@ def test_keep_other_and_keep_original_conflict(self):
599599
]
600600
)
601601
assert "Cannot use both" in stderr
602+
603+
604+
class TestFileValidation:
605+
"""Tests for pre-batch file validation."""
606+
607+
def test_nonexistent_file(self):
608+
"""Test that validation fails for non-existent files."""
609+
_, stderr = ffmpeg_normalize_call(["nonexistent_file.mp4"])
610+
assert "Validation failed" in stderr
611+
assert "does not exist" in stderr
612+
613+
def test_multiple_invalid_files(self):
614+
"""Test that validation reports all invalid files at once."""
615+
_, stderr = ffmpeg_normalize_call(
616+
["nonexistent1.mp4", "nonexistent2.mp4", "nonexistent3.mp4"]
617+
)
618+
assert "Validation failed for 3 file(s)" in stderr
619+
assert "nonexistent1.mp4" in stderr
620+
assert "nonexistent2.mp4" in stderr
621+
assert "nonexistent3.mp4" in stderr
622+
623+
def test_file_without_audio(self, tmp_path):
624+
"""Test that validation fails for files without audio streams."""
625+
# Create a video-only file using ffmpeg
626+
video_only = tmp_path / "video_only.mp4"
627+
cmd = [
628+
"ffmpeg",
629+
"-y",
630+
"-f",
631+
"lavfi",
632+
"-i",
633+
"color=c=blue:s=320x240:d=1",
634+
"-an", # no audio
635+
"-c:v",
636+
"libx264",
637+
str(video_only),
638+
]
639+
subprocess.run(cmd, check=True, capture_output=True)
640+
641+
_, stderr = ffmpeg_normalize_call([str(video_only)])
642+
assert "Validation failed" in stderr
643+
assert "does not contain any audio streams" in stderr
644+
645+
def test_mixed_valid_invalid_files(self, tmp_path):
646+
"""Test that validation fails when mix of valid and invalid files."""
647+
_, stderr = ffmpeg_normalize_call(
648+
["tests/test.mp4", "nonexistent.mp4", "tests/test.m4a"]
649+
)
650+
# Should report the invalid file but not the valid ones
651+
assert "Validation failed for 1 file(s)" in stderr
652+
assert "nonexistent.mp4" in stderr
653+
# Valid files should not appear in error messages
654+
assert "test.mp4" not in stderr or "does not exist" not in stderr
655+
656+
def test_valid_file_passes_validation(self):
657+
"""Test that valid files pass validation."""
658+
# Should succeed (file exists and has audio)
659+
ffmpeg_normalize_call(["tests/test.mp4", "-n"]) # dry run
660+
# No assertion needed - if validation fails, the command will error

tests/test_api_ground_truth.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
Run with: pytest -m "slow" to run only the slow integration tests
1212
"""
1313

14+
import subprocess
1415
import tempfile
1516
import zipfile
1617
from pathlib import Path
@@ -19,6 +20,7 @@
1920
import pytest
2021

2122
from ffmpeg_normalize import FFmpegNormalize
23+
from ffmpeg_normalize._cmd_utils import validate_input_file
2224
from ffmpeg_normalize._streams import LoudnessStatisticsWithMetadata
2325

2426

@@ -640,3 +642,88 @@ def test_batch_mode_preserves_relative_loudness(self, temp_output_dir):
640642
f"Normal file should remain louder than quiet file in batch mode. "
641643
f"Got normal={loudness_values[1]:.2f}, quiet={loudness_values[2]:.2f}"
642644
)
645+
646+
647+
class TestFileValidationAPI:
648+
"""API-level tests for input file validation."""
649+
650+
def test_validate_input_file_exists(self):
651+
"""Test validation of existing audio file."""
652+
test_file = Path(__file__).parent / "test.mp4"
653+
is_valid, error_msg = validate_input_file(str(test_file))
654+
assert is_valid is True
655+
assert error_msg is None
656+
657+
def test_validate_input_file_not_exists(self):
658+
"""Test validation fails for non-existent file."""
659+
is_valid, error_msg = validate_input_file("nonexistent_file.mp4")
660+
assert is_valid is False
661+
assert error_msg is not None
662+
assert "does not exist" in error_msg
663+
664+
def test_validate_input_file_directory(self, tmp_path):
665+
"""Test validation fails for directories."""
666+
is_valid, error_msg = validate_input_file(str(tmp_path))
667+
assert is_valid is False
668+
assert error_msg is not None
669+
assert "not a file" in error_msg
670+
671+
def test_validate_input_file_no_audio(self, tmp_path):
672+
"""Test validation fails for files without audio streams."""
673+
# Create a video-only file using ffmpeg
674+
video_only = tmp_path / "video_only.mp4"
675+
cmd = [
676+
"ffmpeg",
677+
"-y",
678+
"-f",
679+
"lavfi",
680+
"-i",
681+
"color=c=blue:s=320x240:d=1",
682+
"-an", # no audio
683+
"-c:v",
684+
"libx264",
685+
str(video_only),
686+
]
687+
subprocess.run(cmd, check=True, capture_output=True)
688+
689+
is_valid, error_msg = validate_input_file(str(video_only))
690+
assert is_valid is False
691+
assert error_msg is not None
692+
assert "audio" in error_msg.lower()
693+
694+
def test_validate_input_files_batch(self):
695+
"""Test batch validation with multiple files."""
696+
test_dir = Path(__file__).parent
697+
valid_files = [
698+
str(test_dir / "test.mp4"),
699+
str(test_dir / "test.m4a"),
700+
]
701+
702+
errors = FFmpegNormalize.validate_input_files(valid_files)
703+
assert len(errors) == 0
704+
705+
def test_validate_input_files_batch_mixed(self):
706+
"""Test batch validation with mix of valid and invalid files."""
707+
test_dir = Path(__file__).parent
708+
files = [
709+
str(test_dir / "test.mp4"),
710+
"nonexistent1.mp4",
711+
str(test_dir / "test.m4a"),
712+
"nonexistent2.mp4",
713+
]
714+
715+
errors = FFmpegNormalize.validate_input_files(files)
716+
assert len(errors) == 2
717+
assert any("nonexistent1.mp4" in e for e in errors)
718+
assert any("nonexistent2.mp4" in e for e in errors)
719+
720+
def test_validate_input_files_all_invalid(self):
721+
"""Test batch validation with all invalid files."""
722+
files = [
723+
"nonexistent1.mp4",
724+
"nonexistent2.mp4",
725+
"nonexistent3.mp4",
726+
]
727+
728+
errors = FFmpegNormalize.validate_input_files(files)
729+
assert len(errors) == 3

0 commit comments

Comments
 (0)