Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 211 additions & 1 deletion .github/actions/spell-check/allow/zoomit.txt

Large diffs are not rendered by default.

7 changes: 0 additions & 7 deletions .github/actions/spell-check/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ azman
azureaiinference
azureinference
azureopenai
Backlight
backticks
Badflags
Badmode
Expand Down Expand Up @@ -472,7 +471,6 @@ DWMWINDOWATTRIBUTE
DWMWINDOWMAXIMIZEDCHANGE
DWORDLONG
dworigin
DWRITE
dxgi
Dxva
eab
Expand Down Expand Up @@ -998,7 +996,6 @@ luid
lusrmgr
LVDS
LWA
LWIN
LZero
MAGTRANSFORM
makeappx
Expand Down Expand Up @@ -1208,7 +1205,6 @@ nonclient
NONCLIENTMETRICSW
NONELEVATED
nonspace
nonstd
NOOWNERZORDER
NOPARENTNOTIFY
NOPREFIX
Expand Down Expand Up @@ -1422,7 +1418,6 @@ Prefixer
Premul
prependpath
prepopulate
Prereq
prevhost
previewer
PREVIEWHANDLERFRAMEINFO
Expand Down Expand Up @@ -2000,7 +1995,6 @@ valuegenerator
VARTYPE
vbcscompiler
vcamp
VCENTER
vcgtq
VCINSTALLDIR
vcp
Expand Down Expand Up @@ -2038,7 +2032,6 @@ vorrq
VOS
vpaddlq
vqsubq
VREDRAW
vreinterpretq
VSC
VSCBD
Expand Down
6 changes: 6 additions & 0 deletions .github/actions/spell-check/patterns.txt
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,9 @@ ms-windows-store://\S+

# ANSI color codes
(?:\\(?:u00|x)1[Bb]|\\03[1-7]|\x1b|\\u\{1[Bb]\})\[\d+(?:;\d+)*m

# Special licenses text from RNNoise (BSD-style disclaimer: ``AS IS'')
``AS IS''

# Old school moniker for macOS from RNNoise
MacOS
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
[Rr]eleases/
x64/
x86/
!**/rnnoise/
!**/rnnoise/x86/
!**/rnnoise/x86/**
ARM64/
bld/
[Bb]in/
Expand Down
157 changes: 104 additions & 53 deletions src/modules/ZoomIt/ZoomIt/AudioSampleGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,17 @@ namespace winrt
using namespace Windows::Devices::Enumeration;
}

AudioSampleGenerator::AudioSampleGenerator(bool captureMicrophone, bool captureSystemAudio, bool micMonoMix)
AudioSampleGenerator::AudioSampleGenerator(bool captureMicrophone, bool captureSystemAudio, bool mixMicrophoneMono, bool useNoiseCancellation)
: m_captureMicrophone(captureMicrophone)
, m_captureSystemAudio(captureSystemAudio)
, m_micMonoMix(micMonoMix)
, m_mixMicrophoneMono(mixMicrophoneMono)
, m_useNoiseCancellation(useNoiseCancellation)
{
OutputDebugStringA(("AudioSampleGenerator created, captureMicrophone=" +
std::string(captureMicrophone ? "true" : "false") +
", captureSystemAudio=" + std::string(captureSystemAudio ? "true" : "false") +
", micMonoMix=" + std::string(micMonoMix ? "true" : "false") + "\n").c_str());
", mixMicrophoneMono=" + std::string(mixMicrophoneMono ? "true" : "false") +
", useNoiseCancellation=" + std::string(useNoiseCancellation ? "true" : "false") + "\n").c_str());
m_audioEvent.create(wil::EventOptions::ManualReset);
m_endEvent.create(wil::EventOptions::ManualReset);
m_startEvent.create(wil::EventOptions::ManualReset);
Expand Down Expand Up @@ -158,8 +160,24 @@ winrt::IAsyncAction AudioSampleGenerator::InitializeAsync()
throw winrt::hresult_error(E_FAIL, L"Failed to initialize loopback audio capture!");
}

// Initialize noise suppressor for microphone audio if enabled
if (m_useNoiseCancellation && m_captureMicrophone)
{
m_noiseSuppressor = std::make_unique<NoiseSuppressor>();
OutputDebugStringA("Noise cancellation enabled for microphone\n");
}

m_audioGraph.QuantumStarted({ this, &AudioSampleGenerator::OnAudioQuantumStarted });

// Start the AudioGraph now so the microphone device begins warming up
// during the remaining recording initialization (transcoder setup, etc.).
// OnAudioQuantumStarted returns early while m_started is false, so audio
// samples are discarded until Start() is called. The side-effect of
// starting the graph early is that the system mic-active icon appears
// sooner, which also triggers a desktop-content change that helps
// unblock the WGC frame pool wait in OnMediaStreamSourceStarting.
m_audioGraph.Start();

m_asyncInitialized.SetEvent();
}
}
Expand Down Expand Up @@ -205,67 +223,65 @@ std::optional<winrt::MediaStreamSample> AudioSampleGenerator::TryGetNextSample()
}
}

// Wait for audio samples to become available, retrying on spurious wakes
// (e.g. when OnAudioQuantumStarted signals m_audioEvent but the quantum
// produced an empty buffer so m_samples is still empty).
for (;;)
{
auto lock = m_lock.lock_exclusive();
if (m_samples.empty() && m_endEvent.is_signaled())
{
return std::nullopt;
}
else if (!m_samples.empty())
{
std::optional result(m_samples.front());
m_samples.pop_front();
return result;
auto lock = m_lock.lock_exclusive();
if (m_samples.empty() && m_endEvent.is_signaled())
{
return std::nullopt;
}
else if (!m_samples.empty())
{
std::optional result(m_samples.front());
m_samples.pop_front();
return result;
}
}
}

m_audioEvent.ResetEvent();
std::vector<HANDLE> events = { m_endEvent.get(), m_audioEvent.get() };
auto waitResult = WaitForMultipleObjectsEx(static_cast<DWORD>(events.size()), events.data(), false, INFINITE, false);
auto eventIndex = -1;
switch (waitResult)
{
case WAIT_OBJECT_0:
case WAIT_OBJECT_0 + 1:
eventIndex = waitResult - WAIT_OBJECT_0;
break;
}
WINRT_VERIFY(eventIndex >= 0);

auto signaledEvent = events[eventIndex];
if (signaledEvent == m_endEvent.get())
{
// End was signaled, but check for any remaining samples before returning nullopt
auto lock = m_lock.lock_exclusive();
if (!m_samples.empty())
m_audioEvent.ResetEvent();
std::vector<HANDLE> events = { m_endEvent.get(), m_audioEvent.get() };
auto waitResult = WaitForMultipleObjectsEx(static_cast<DWORD>(events.size()), events.data(), false, INFINITE, false);
auto eventIndex = -1;
switch (waitResult)
{
std::optional result(m_samples.front());
m_samples.pop_front();
return result;
case WAIT_OBJECT_0:
case WAIT_OBJECT_0 + 1:
eventIndex = waitResult - WAIT_OBJECT_0;
break;
}
return std::nullopt;
}
else
{
auto lock = m_lock.lock_exclusive();
if (m_samples.empty())
WINRT_VERIFY(eventIndex >= 0);

auto signaledEvent = events[eventIndex];
if (signaledEvent == m_endEvent.get())
{
// Spurious wake or race - no samples available
// If end is signaled, return nullopt
return m_endEvent.is_signaled() ? std::nullopt : std::optional<winrt::MediaStreamSample>{};
// End was signaled, but check for any remaining samples before returning nullopt
auto lock = m_lock.lock_exclusive();
if (!m_samples.empty())
{
std::optional result(m_samples.front());
m_samples.pop_front();
return result;
}
return std::nullopt;
}
std::optional result(m_samples.front());
m_samples.pop_front();
return result;
// m_audioEvent was signaled — loop back to check m_samples again.
// If the quantum produced an empty buffer, m_samples will still be
// empty and we'll wait for the next quantum.
}
}

void AudioSampleGenerator::Start()
void AudioSampleGenerator::Start(int64_t videoStartTimestamp)
{
CheckInitialized();
m_videoStartTimestamp = videoStartTimestamp;
auto expected = false;
if (m_started.compare_exchange_strong(expected, true))
{
OutputDebugStringW( L"[AudioGen] Start(): m_started set to true, setting m_startEvent\n" );
m_endEvent.ResetEvent();
m_startEvent.SetEvent();

Expand All @@ -284,7 +300,7 @@ void AudioSampleGenerator::Start()
m_loopbackCapture->Start();
}

m_audioGraph.Start();
// AudioGraph was already started in InitializeAsync for mic warmup.
}
}

Expand Down Expand Up @@ -611,12 +627,21 @@ void AudioSampleGenerator::CombineQueuedSamples()

void AudioSampleGenerator::OnAudioQuantumStarted(winrt::AudioGraph const& sender, winrt::IInspectable const& args)
{
// Don't process if we're not actively recording
// Don't process if we're not actively recording, but DO drain the
// output node so stale audio doesn't accumulate during mic warmup.
// Without this, the first GetFrame() after m_started becomes true
// would return several seconds of buffered audio, confusing the
// transcoder's A/V interleaving.
if (!m_started.load())
{
auto frame = m_audioOutputNode.GetFrame();
(void)frame; // discard
return;
}

static int s_quantumCount = 0;
s_quantumCount++;

{
auto lock = m_lock.lock_exclusive();

Expand All @@ -628,6 +653,14 @@ void AudioSampleGenerator::OnAudioQuantumStarted(winrt::AudioGraph const& sender
auto sampleBuffer = winrt::Buffer::CreateCopyFromMemoryBuffer(audioBuffer);
sampleBuffer.Length(audioBuffer.Length());

if( s_quantumCount <= 5 )
{
wchar_t dbg[256];
swprintf_s( dbg, L"[AudioGen] quantum #%d: audioBuffer.Length=%u sampleBuffer.Length=%u started=%d\n",
s_quantumCount, audioBuffer.Length(), sampleBuffer.Length(), m_started.load() ? 1 : 0 );
OutputDebugStringW( dbg );
}

// Calculate expected samples per quantum (~10ms at graph sample rate)
// AudioGraph uses 10ms quantums by default
uint32_t expectedSamplesPerQuantum = (m_graphSampleRate / 100) * m_graphChannels;
Expand All @@ -636,7 +669,7 @@ void AudioSampleGenerator::OnAudioQuantumStarted(winrt::AudioGraph const& sender
// Apply mono mixing to microphone audio if enabled
// This converts stereo mic input (with same signal on both channels) to true mono
// by averaging the channels and writing the result to both channels
if (m_micMonoMix && m_captureMicrophone && numMicSamples > 0 && m_graphChannels >= 2)
if (m_mixMicrophoneMono && m_captureMicrophone && numMicSamples > 0 && m_graphChannels >= 2)
{
float* micData = reinterpret_cast<float*>(sampleBuffer.data());
uint32_t numFrames = numMicSamples / m_graphChannels;
Expand All @@ -656,6 +689,12 @@ void AudioSampleGenerator::OnAudioQuantumStarted(winrt::AudioGraph const& sender
}
}
}
// Apply noise suppression to microphone audio before mixing with loopback
if (m_noiseSuppressor && m_captureMicrophone && numMicSamples > 0)
{
float* micData = reinterpret_cast<float*>(sampleBuffer.data());
m_noiseSuppressor->Process(micData, numMicSamples, m_graphChannels);
}

// Drain loopback samples regardless of whether we have mic audio
if (m_loopbackCapture)
Expand Down Expand Up @@ -733,13 +772,25 @@ void AudioSampleGenerator::OnAudioQuantumStarted(winrt::AudioGraph const& sender

if (sampleBuffer.Length() > 0)
{
auto sample = winrt::MediaStreamSample::CreateFromBuffer(sampleBuffer, timestamp.value());
// Rebase audio timestamps to the video's SystemRelativeTime domain.
// AudioGraph RelativeTime starts near 0 (or a few hundred ms after
// warmup draining), while video uses absolute SRT (~hours since boot).
// Without rebasing, the transcoder sees audio far behind video and
// starves video while trying to fill the gap with audio.
if (!m_hasTimestampOffset && timestamp.has_value())
{
m_timestampOffset = m_videoStartTimestamp - timestamp.value().count();
m_hasTimestampOffset = true;
}
auto adjustedTs = winrt::TimeSpan{ timestamp.value().count() + m_timestampOffset };

auto sample = winrt::MediaStreamSample::CreateFromBuffer(sampleBuffer, adjustedTs);
m_samples.push_back(sample);

const uint32_t sampleCount = sampleBuffer.Length() / sizeof(float);
const uint32_t frames = (m_graphChannels > 0) ? (sampleCount / m_graphChannels) : 0;
const int64_t durationTicks = (m_graphSampleRate > 0) ? (static_cast<int64_t>(frames) * 10000000LL / m_graphSampleRate) : 0;
m_lastSampleTimestamp = timestamp.value();
m_lastSampleTimestamp = adjustedTs;
m_lastSampleDuration = winrt::TimeSpan{ durationTicks };
m_hasLastSampleTimestamp = true;
}
Expand Down
22 changes: 18 additions & 4 deletions src/modules/ZoomIt/ZoomIt/AudioSampleGenerator.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
#pragma once

#include "LoopbackCapture.h"
#include "NoiseSuppressor.h"

#include <deque>
#include <optional>
#include <memory>

class AudioSampleGenerator
{
public:
AudioSampleGenerator(bool captureMicrophone = true, bool captureSystemAudio = true, bool micMonoMix = false);
AudioSampleGenerator(bool captureMicrophone = true, bool captureSystemAudio = true, bool mixMicrophoneMono = false, bool useNoiseCancellation = false);
~AudioSampleGenerator();

winrt::Windows::Foundation::IAsyncAction InitializeAsync();
winrt::Windows::Media::MediaProperties::AudioEncodingProperties GetEncodingProperties();

std::optional<winrt::Windows::Media::Core::MediaStreamSample> TryGetNextSample();
void Start();
void Start(int64_t videoStartTimestamp = 0);
void Stop();

private:
Expand Down Expand Up @@ -70,5 +75,14 @@ class AudioSampleGenerator
std::atomic<bool> m_started = false;
bool m_captureMicrophone = true;
bool m_captureSystemAudio = true;
bool m_micMonoMix = false;
};
bool m_mixMicrophoneMono = false;
bool m_useNoiseCancellation = false;
std::unique_ptr<NoiseSuppressor> m_noiseSuppressor;

// Timestamp rebasing: audio RelativeTime → video SystemRelativeTime domain.
// Without this, the transcoder sees audio timestamps (~350ms) far below video
// timestamps (~111 billion ticks) and starves video while trying to fill the gap.
int64_t m_videoStartTimestamp = 0;
int64_t m_timestampOffset = 0;
bool m_hasTimestampOffset = false;
};
Loading
Loading