Skip to content

Commit 7651237

Browse files
authored
Adds support for audio samples in float64, float16, or uint16 formats (#2545)
* fixed audio * added some unit tests * formatting * formatting
1 parent 5c80b54 commit 7651237

3 files changed

Lines changed: 50 additions & 16 deletions

File tree

CHANGELOG.md

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,23 @@ In the example above, 16 requests could be processed in parallel (for a total in
4848
time of 5 seconds), instead of each request being processed separately (for a total
4949
inference time of 80 seconds).
5050

51-
### Load Event
51+
### Upload Event
5252

53-
`Video`, `Audio`, `Image`, and `File` components now support a `upload` event that is triggered when a user uploads a file into any of these components.
53+
`Video`, `Audio`, `Image`, and `File` components now support a `upload()` event that is triggered when a user uploads a file into any of these components.
54+
55+
Example usage:
56+
57+
```py
58+
import gradio as gr
59+
60+
with gr.Blocks() as demo:
61+
with gr.Row():
62+
input_video = gr.Video()
63+
output_video = gr.Video()
64+
65+
# Clears the output video when an input video is uploaded
66+
input_video.upload(lambda : None, None, output_video)
67+
```
5468

5569

5670
## Bug Fixes:
@@ -79,6 +93,7 @@ No changes to highlight.
7993
* Changes websocket path for Spaces as it is no longer necessary to have a different URL for websocket connections on Spaces by [@abidlabs](https://github.com/abidlabs) in [PR 2528](https://github.com/gradio-app/gradio/pull/2528)
8094
* Clearer error message when events are defined outside of a Blocks scope, and a warning if you
8195
try to use `Series` or `Parallel` with `Blocks` by [@abidlabs](https://github.com/abidlabs) in [PR 2543](https://github.com/gradio-app/gradio/pull/2543)
96+
* Adds support for audio samples that are in `float64`, `float16`, or `uint16` formats by [@abidlabs](https://github.com/abidlabs) in [PR 2545](https://github.com/gradio-app/gradio/pull/2545)
8297

8398

8499
## Contributors Shoutout:

gradio/processing_utils.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -220,32 +220,31 @@ def audio_to_file(sample_rate, data, filename):
220220

221221
def convert_to_16_bit_wav(data):
222222
# Based on: https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.wavfile.write.html
223-
if data.dtype == np.float32:
224-
warnings.warn(
225-
"Audio data is not in 16-bit integer format."
226-
"Trying to convert to 16-bit int format."
227-
)
223+
warning = "Trying to convert audio automatically from {} to 16-bit int format."
224+
if data.dtype in [np.float64, np.float32, np.float16]:
225+
warnings.warn(warning.format(data.dtype))
228226
data = data / np.abs(data).max()
229227
data = data * 32767
230228
data = data.astype(np.int16)
231229
elif data.dtype == np.int32:
232-
warnings.warn(
233-
"Audio data is not in 16-bit integer format."
234-
"Trying to convert to 16-bit int format."
235-
)
230+
warnings.warn(warning.format(data.dtype))
236231
data = data / 65538
237232
data = data.astype(np.int16)
238233
elif data.dtype == np.int16:
239234
pass
235+
elif data.dtype == np.uint16:
236+
warnings.warn(warning.format(data.dtype))
237+
data = data - 32768
238+
data = data.astype(np.int16)
240239
elif data.dtype == np.uint8:
241-
warnings.warn(
242-
"Audio data is not in 16-bit integer format."
243-
"Trying to convert to 16-bit int format."
244-
)
240+
warnings.warn(warning.format(data.dtype))
245241
data = data * 257 - 32768
246242
data = data.astype(np.int16)
247243
else:
248-
raise ValueError("Audio data cannot be converted to " "16-bit int format.")
244+
raise ValueError(
245+
"Audio data cannot be converted automatically from "
246+
f"{data.dtype} to 16-bit int format."
247+
)
249248
return data
250249

251250

test/test_processing_utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,26 @@ def test_audio_to_file(self):
9797
self.assertTrue(os.path.exists("test_audio_to_file"))
9898
os.remove("test_audio_to_file")
9999

100+
def test_convert_to_16_bit_wav(self):
101+
# Generate a random audio sample and set the amplitude
102+
audio = np.random.randint(-100, 100, size=(100), dtype="int16")
103+
audio[0] = -32767
104+
audio[1] = 32766
105+
106+
audio_ = audio.astype("float64")
107+
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
108+
assert np.allclose(audio, audio_)
109+
assert audio_.dtype == "int16"
110+
111+
audio_ = audio.astype("float32")
112+
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio_)
113+
assert np.allclose(audio, audio_)
114+
assert audio_.dtype == "int16"
115+
116+
audio_ = gr.processing_utils.convert_to_16_bit_wav(audio)
117+
assert np.allclose(audio, audio_)
118+
assert audio_.dtype == "int16"
119+
100120

101121
class TestOutputPreprocessing(unittest.TestCase):
102122
def test_decode_base64_to_binary(self):

0 commit comments

Comments
 (0)