Skip to content

Commit 7950908

Browse files
authored
handle printing even if sys.stdout.buffer is not available (openai#887)
1 parent 1f9b52a commit 7950908

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

whisper/transcribe.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import argparse
22
import os
3-
import sys
43
import warnings
5-
from typing import List, Optional, Tuple, Union, TYPE_CHECKING
4+
from typing import Optional, Tuple, Union, TYPE_CHECKING
65

76
import numpy as np
87
import torch
@@ -11,7 +10,7 @@
1110
from .audio import SAMPLE_RATE, N_FRAMES, HOP_LENGTH, pad_or_trim, log_mel_spectrogram
1211
from .decoding import DecodingOptions, DecodingResult
1312
from .tokenizer import LANGUAGES, TO_LANGUAGE_CODE, get_tokenizer
14-
from .utils import exact_div, format_timestamp, optional_int, optional_float, str2bool, get_writer
13+
from .utils import exact_div, format_timestamp, make_safe, optional_int, optional_float, str2bool, get_writer
1514

1615
if TYPE_CHECKING:
1716
from .model import Whisper
@@ -166,11 +165,7 @@ def add_segment(
166165
}
167166
)
168167
if verbose:
169-
line = f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}\n"
170-
# compared to just `print(line)`, this replaces any character not representable using
171-
# the system default encoding with an '?', avoiding UnicodeEncodeError.
172-
sys.stdout.buffer.write(line.encode(sys.getdefaultencoding(), errors="replace"))
173-
sys.stdout.flush()
168+
print(make_safe(f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}"))
174169

175170
# show the progress bar when verbose is False (otherwise the transcribed text will be printed)
176171
num_frames = mel.shape[-1]

whisper/utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
11
import json
22
import os
3+
import sys
34
import zlib
45
from typing import Callable, TextIO
56

7+
system_encoding = sys.getdefaultencoding()
8+
9+
if system_encoding != "utf-8":
10+
def make_safe(string):
11+
# replaces any character not representable using the system default encoding with an '?',
12+
# avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729).
13+
return string.encode(system_encoding, errors="replace").decode(system_encoding)
14+
else:
15+
def make_safe(string):
16+
# utf-8 can encode any Unicode code point, so no need to do the round-trip encoding
17+
return string
18+
619

720
def exact_div(x, y):
821
assert x % y == 0

0 commit comments

Comments
 (0)