Skip to content

Commit 9d646db

Browse files
authored
print '?' if a letter can't be encoded using the system default encoding (#859)
1 parent 37a4f1b commit 9d646db

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

whisper/transcribe.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import argparse
22
import os
3+
import sys
34
import warnings
45
from typing import List, Optional, Tuple, Union, TYPE_CHECKING
56

@@ -165,7 +166,10 @@ def add_segment(
165166
}
166167
)
167168
if verbose:
168-
print(f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}")
169+
line = f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}\n"
170+
# compared to just `print(line)`, this replaces any character not representable using
171+
# the system default encoding with an '?', avoiding UnicodeEncodeError.
172+
sys.stderr.buffer.write(line.encode(sys.getdefaultencoding(), errors="replace"))
169173

170174
# show the progress bar when verbose is False (otherwise the transcribed text will be printed)
171175
num_frames = mel.shape[-1]

0 commit comments

Comments
 (0)