Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/solid-sides-look.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"trackio": patch
---

feat:Remove pydub dependency
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor formatting: add a space after feat: so the changeset summary reads cleanly (e.g., feat: Remove pydub dependency).

Suggested change
feat:Remove pydub dependency
feat: Remove pydub dependency

Copilot uses AI. Check for mistakes.
9 changes: 1 addition & 8 deletions examples/crash-and-resume-same-run-name.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,8 @@
import argparse
import math
import uuid
import warnings

warnings.filterwarnings(
"ignore",
category=SyntaxWarning,
module=r"pydub\.utils",
)

import trackio # noqa: E402
import trackio

DEFAULT_PROJECT = f"crash-and-resume-demo-{uuid.uuid4().hex[:8]}"
DEFAULT_RUN_NAME = "trainer-job-42"
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ dependencies = [
"numpy<3.0.0",
"pillow<13.0.0",
"orjson>=3.0,<4.0.0",
"pydub<1.0.0",
"tomli>=2.0.0; python_version < '3.11'",
]
classifiers = [
Expand Down
38 changes: 32 additions & 6 deletions tests/unit/test_audio_writer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import math
import shutil
import subprocess
import wave
from pathlib import Path

Expand Down Expand Up @@ -34,6 +36,29 @@ def _has_ffmpeg() -> bool:
return shutil.which("ffmpeg") is not None


def _has_ffprobe() -> bool:
return shutil.which("ffprobe") is not None


def _probe_audio(path: Path) -> tuple[int, int]:
out = subprocess.check_output(
[
"ffprobe",
"-v",
"error",
"-select_streams",
"a:0",
"-show_entries",
"stream=sample_rate,channels",
"-of",
"json",
str(path),
]
)
stream = json.loads(out)["streams"][0]
return int(stream["sample_rate"]), int(stream["channels"])


@pytest.mark.parametrize("channels", [1, 2])
def test_write_wav_mono_and_stereo_with_float_normalization(
tmp_path: Path, channels: int
Expand All @@ -56,11 +81,11 @@ def test_write_wav_mono_and_stereo_with_float_normalization(
assert 32000 <= max_abs <= 32767


@pytest.mark.skipif(not _has_ffmpeg(), reason="ffmpeg not available")
@pytest.mark.skipif(
not (_has_ffmpeg() and _has_ffprobe()), reason="ffmpeg/ffprobe not available"
)
@pytest.mark.parametrize("channels", [1, 2])
def test_write_mp3_mono_and_stereo(tmp_path: Path, channels: int) -> None:
from pydub import AudioSegment

mono = _tone(0.1, 440.0, SAMPLE_RATE, amp=0.5)
data = mono if channels == 1 else np.stack([mono, mono], axis=1)

Expand All @@ -69,9 +94,10 @@ def test_write_mp3_mono_and_stereo(tmp_path: Path, channels: int) -> None:
data=data, sample_rate=SAMPLE_RATE, filename=out, format="mp3"
)

seg = AudioSegment.from_file(str(out), format="mp3")
assert seg.frame_rate == SAMPLE_RATE
assert seg.channels == channels
assert out.exists() and out.stat().st_size > 0
sr, ch = _probe_audio(out)
assert sr == SAMPLE_RATE
assert ch == channels


@pytest.mark.parametrize(
Expand Down
42 changes: 32 additions & 10 deletions trackio/media/audio.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import shutil
import subprocess
import warnings
import wave
from pathlib import Path
from typing import Literal

import numpy as np
from pydub import AudioSegment

from trackio.media.media import TrackioMedia
from trackio.media.utils import check_ffmpeg_installed, check_path
Expand Down Expand Up @@ -156,12 +157,33 @@ def write_audio(
check_ffmpeg_installed()

channels = 1 if pcm.ndim == 1 else pcm.shape[1]
audio = AudioSegment(
pcm.tobytes(),
frame_rate=sample_rate,
sample_width=2, # int16
channels=channels,
)

file = audio.export(str(filename), format=format)
file.close()
pcm_bytes = pcm.tobytes()

if format == "wav":
with wave.open(str(filename), "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(2)
wf.setframerate(sample_rate)
wf.writeframes(pcm_bytes)
else:
cmd = [
"ffmpeg",
"-y",
"-loglevel",
"error",
"-f",
"s16le",
"-ar",
str(sample_rate),
"-ac",
str(channels),
"-i",
"pipe:0",
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ffmpeg invocation relies on the output filename’s extension to select the output container/codec. That changes behavior vs the previous pydub.export(..., format=format) path: if format="mp3" but the filename has an unknown/mismatched extension, ffmpeg will error or encode to the extension’s format. Consider explicitly specifying the output format/codec (e.g., -f mp3 and/or -codec:a libmp3lame) so the format argument always controls the encoding regardless of filename.

Suggested change
"pipe:0",
"pipe:0",
"-f",
format,
"-codec:a",
"libmp3lame",

Copilot uses AI. Check for mistakes.
str(filename),
]
proc = subprocess.run(
cmd, input=pcm_bytes, capture_output=True, check=False
)
if proc.returncode != 0:
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
raise RuntimeError(f"ffmpeg failed to encode {format} audio: {stderr}")
2 changes: 1 addition & 1 deletion trackio/media/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def check_ffmpeg_installed() -> None:
"""Raise an error if ffmpeg is not available on the system PATH."""
if shutil.which("ffmpeg") is None:
raise RuntimeError(
"ffmpeg is required to write video but was not found on your system. "
"ffmpeg is required to write this media format but was not found on your system. "
"Please install ffmpeg and ensure it is available on your PATH."
)

Expand Down
Loading