|
1 | 1 | import argparse |
2 | 2 | import os |
3 | | -import sys |
4 | 3 | import warnings |
5 | | -from typing import List, Optional, Tuple, Union, TYPE_CHECKING |
| 4 | +from typing import Optional, Tuple, Union, TYPE_CHECKING |
6 | 5 |
|
7 | 6 | import numpy as np |
8 | 7 | import torch |
|
11 | 10 | from .audio import SAMPLE_RATE, N_FRAMES, HOP_LENGTH, pad_or_trim, log_mel_spectrogram |
12 | 11 | from .decoding import DecodingOptions, DecodingResult |
13 | 12 | from .tokenizer import LANGUAGES, TO_LANGUAGE_CODE, get_tokenizer |
14 | | -from .utils import exact_div, format_timestamp, optional_int, optional_float, str2bool, get_writer |
| 13 | +from .utils import exact_div, format_timestamp, make_safe, optional_int, optional_float, str2bool, get_writer |
15 | 14 |
|
16 | 15 | if TYPE_CHECKING: |
17 | 16 | from .model import Whisper |
@@ -166,11 +165,7 @@ def add_segment( |
166 | 165 | } |
167 | 166 | ) |
168 | 167 | if verbose: |
169 | | - line = f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}\n" |
170 | | - # compared to just `print(line)`, this replaces any character not representable using |
171 | | - # the system default encoding with an '?', avoiding UnicodeEncodeError. |
172 | | - sys.stdout.buffer.write(line.encode(sys.getdefaultencoding(), errors="replace")) |
173 | | - sys.stdout.flush() |
| 168 | + print(make_safe(f"[{format_timestamp(start)} --> {format_timestamp(end)}] {text}")) |
174 | 169 |
|
175 | 170 | # show the progress bar when verbose is False (otherwise the transcribed text will be printed) |
176 | 171 | num_frames = mel.shape[-1] |
|
0 commit comments