Skip to content

Commit 2eaf493

Browse files
committed
[*] refactoring: change encoding rgb image to yuv image into h264 format
1 parent eabcdb1 commit 2eaf493

File tree

10 files changed

+248
-42
lines changed

10 files changed

+248
-42
lines changed

Cargo.lock

Lines changed: 62 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ console_log = "1.0"
8282
crypto-hash = "0.3"
8383
platform-dirs = "0.3"
8484

85+
yuv = "0.8"
8586
mp4 = "0.14"
8687
x264 = "0.5"
8788
cpal = "0.16"
@@ -97,6 +98,7 @@ thiserror = "2.0"
9798
crossbeam = "0.8"
9899
spin_sleep = "1.3"
99100
nnnoiseless = "0.5"
101+
h264-reader = "0.8"
100102
ffmpeg-sidecar = "2.2"
101103
derive_builder = "0.20"
102104
wayland-client = "0.31"

lib/mp4m/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ rubato.workspace = true
1919
fdk-aac.workspace = true
2020
thiserror.workspace = true
2121
crossbeam.workspace = true
22+
h264-reader.workspace = true
2223
derive_builder.workspace = true
2324

2425
[dev-dependencies]

lib/mp4m/examples/mp4_processor_demo.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
5858
spec: *spec,
5959
})?;
6060

61-
// Start processing in a separate thread
62-
let processor_thread = thread::spawn(move || {
63-
if let Err(e) = processor.run_processing_loop() {
64-
log::warn!("MP4 processing error: {}", e);
65-
}
66-
});
67-
6861
// Process audio samples if available
6962
if let Some(spec) = audio_spec {
7063
log::debug!(
@@ -75,8 +68,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
7568
);
7669

7770
// Use AAC-friendly frame size (1024 samples per channel)
78-
// let aac_frame_size = 1024; // AAC typically uses 1024 samples per frame
79-
let aac_frame_size = 1124 * 3; // AAC typically uses 1024 samples per frame
71+
let aac_frame_size = 1024; // AAC typically uses 1024 samples per frame
72+
// let aac_frame_size = 1124 * 3; // AAC typically uses 1024 samples per frame
8073
let samples_per_frame = aac_frame_size * spec.channels as usize;
8174

8275
log::debug!(
@@ -108,9 +101,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
108101
// Generate and send video frames
109102
let mut h264_encoder = VideoEncoder::new(width, height, fps)?;
110103
let headers_data = h264_encoder.headers()?.entirety().to_vec();
111-
if let Err(e) = video_sender.send(VideoFrameType::Frame(headers_data)) {
112-
panic!("video sender h264 header failed: {e}");
113-
}
104+
105+
// Start processing in a separate thread with headers data
106+
let processor_thread = thread::spawn(move || {
107+
if let Err(e) = processor.run_processing_loop(Some(headers_data)) {
108+
log::warn!("MP4 processing error: {}", e);
109+
}
110+
});
114111

115112
for frame_num in 0..total_frames {
116113
let img = match (frame_num / fps.to_u32()) % 3 {

lib/mp4m/src/mp4_processor.rs

Lines changed: 92 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,29 @@
11
use crossbeam::channel::{Receiver, Sender, bounded};
22
use derive_builder::Builder;
33
use fdk_aac::enc::{BitRate, ChannelMode, Encoder, EncoderParams, Transport};
4+
use h264_reader::{
5+
annexb::AnnexBReader,
6+
nal::{Nal, RefNal, UnitType},
7+
push::NalInterest,
8+
};
49
use hound::WavSpec;
510
use mp4::{
611
AacConfig, AvcConfig, ChannelConfig, Mp4Config, Mp4Sample, Mp4Writer, SampleFreqIndex,
712
TrackConfig, TrackType,
813
};
9-
use std::{fs::File, io::BufWriter, path::PathBuf};
14+
use std::{
15+
fs::File,
16+
io::{BufWriter, Read},
17+
path::PathBuf,
18+
};
1019
use thiserror::Error;
1120

21+
const DEFAULT_PPS: [u8; 6] = [0x68, 0xeb, 0xe3, 0xcb, 0x22, 0xc0];
22+
const DEFAULT_SPS: [u8; 25] = [
23+
0x67, 0x64, 0x00, 0x1e, 0xac, 0xd9, 0x40, 0xa0, 0x2f, 0xf9, 0x70, 0x11, 0x00, 0x00, 0x03, 0x03,
24+
0xe9, 0x00, 0x00, 0xea, 0x60, 0x0f, 0x16, 0x2d, 0x96,
25+
];
26+
1227
pub enum VideoFrameType {
1328
Frame(Vec<u8>),
1429
End,
@@ -221,18 +236,72 @@ impl Mp4Processor {
221236
.map_err(|e| Mp4ProcessorError::Mp4(e.to_string()))
222237
}
223238

239+
fn extract_sps_pps_from_headers(
240+
&self,
241+
headers_data: &[u8],
242+
) -> Result<(Vec<u8>, Vec<u8>), Mp4ProcessorError> {
243+
let mut sps = None;
244+
let mut pps = None;
245+
246+
let mut reader = AnnexBReader::accumulate(|nal: RefNal<'_>| {
247+
let nal_unit_type = nal.header().unwrap().nal_unit_type();
248+
249+
// Read all data from the NAL unit
250+
let mut reader = nal.reader();
251+
let mut data = Vec::new();
252+
if let Ok(_) = reader.read_to_end(&mut data) {
253+
match nal_unit_type {
254+
UnitType::SeqParameterSet => {
255+
sps = Some(data);
256+
}
257+
UnitType::PicParameterSet => {
258+
pps = Some(data);
259+
}
260+
_ => {}
261+
}
262+
}
263+
264+
NalInterest::Buffer
265+
});
266+
267+
reader.push(headers_data);
268+
reader.reset();
269+
270+
match (sps, pps) {
271+
(Some(sps_data), Some(pps_data)) => {
272+
log::info!(
273+
"Successfully extracted SPS ({} bytes) and PPS ({} bytes) from headers",
274+
sps_data.len(),
275+
pps_data.len()
276+
);
277+
log::debug!(
278+
"SPS first 10 bytes: {:02x?}",
279+
&sps_data[..sps_data.len().min(10)]
280+
);
281+
log::debug!(
282+
"PPS first 10 bytes: {:02x?}",
283+
&pps_data[..pps_data.len().min(10)]
284+
);
285+
Ok((sps_data, pps_data))
286+
}
287+
_ => {
288+
log::warn!("Failed to extract SPS/PPS from headers, using fallback");
289+
Ok((DEFAULT_SPS.to_vec(), DEFAULT_PPS.to_vec()))
290+
}
291+
}
292+
}
293+
224294
fn setup_video_track(
225295
&self,
226296
mp4_writer: &mut Mp4Writer<BufWriter<File>>,
227297
video_config: &VideoConfig,
298+
headers_data: Option<&[u8]>,
228299
) -> Result<(), Mp4ProcessorError> {
229-
// Setup video track with minimal SPS/PPS for H.264
230-
// These are basic parameters that should work for most cases
231-
let sps = vec![
232-
0x67, 0x64, 0x00, 0x1e, 0xac, 0xd9, 0x40, 0xa0, 0x2f, 0xf9, 0x70, 0x11, 0x00, 0x00,
233-
0x03, 0x03, 0xe9, 0x00, 0x00, 0xea, 0x60, 0x0f, 0x16, 0x2d, 0x96,
234-
];
235-
let pps = vec![0x68, 0xeb, 0xe3, 0xcb, 0x22, 0xc0];
300+
let (sps, pps) = if let Some(headers) = headers_data {
301+
self.extract_sps_pps_from_headers(headers)?
302+
} else {
303+
(DEFAULT_SPS.to_vec(), DEFAULT_PPS.to_vec())
304+
};
236305

237306
let video_track_config = TrackConfig {
238307
track_type: TrackType::Video,
@@ -318,9 +387,16 @@ impl Mp4Processor {
318387
Ok(audio_track_ids)
319388
}
320389

321-
pub fn run_processing_loop(&mut self) -> Result<(), Mp4ProcessorError> {
390+
pub fn run_processing_loop(
391+
&mut self,
392+
headers_data: Option<Vec<u8>>,
393+
) -> Result<(), Mp4ProcessorError> {
322394
let mut mp4_writer = self.setup_mp4_writer()?;
323-
self.setup_video_track(&mut mp4_writer, &self.config.video_config)?;
395+
self.setup_video_track(
396+
&mut mp4_writer,
397+
&self.config.video_config,
398+
headers_data.as_deref(),
399+
)?;
324400
let audio_track_ids = self.setup_audio_tracks(&mut mp4_writer)?;
325401

326402
let mut video_timestamp = 0u64;
@@ -398,7 +474,7 @@ impl Mp4Processor {
398474

399475
match self.encode_samples_to_aac(track_index, chunk) {
400476
Ok(aac_data) => {
401-
log::info!("aac_data len: {} bytes", aac_data.len());
477+
// log::info!("aac_data len: {} bytes", aac_data.len());
402478

403479
let samples_per_channel = chunk.len() / channels;
404480

@@ -459,11 +535,11 @@ impl Mp4Processor {
459535
) {
460536
for track_index in 0..self.audio_buffer_cache.len() {
461537
if !self.audio_buffer_cache[track_index].is_empty() {
462-
log::info!(
463-
"Flushing cached audio data for track {}: {} samples",
464-
track_index,
465-
self.audio_buffer_cache[track_index].len()
466-
);
538+
// log::info!(
539+
// "Flushing cached audio data for track {}: {} samples",
540+
// track_index,
541+
// self.audio_buffer_cache[track_index].len()
542+
// );
467543

468544
// Process the remaining cached data
469545
let cached_data = std::mem::take(&mut self.audio_buffer_cache[track_index]);

lib/recorder/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ once_cell.workspace = true
2626
spin_sleep.workspace = true
2727
nnnoiseless.workspace = true
2828
derive_setters.workspace = true
29+
yuv = { workspace = true, features = ["rayon"] }
2930
fast_image_resize = { workspace = true, features = ["rayon"] }
3031

3132
# ffmpeg-sidecar.workspace = true

lib/recorder/examples/recording_10m_demo.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
88

99
log::debug!("Recording for exactly 5 seconds...");
1010

11-
let audio_recorder = AudioRecorder::new(None)?;
11+
let audio_recorder = AudioRecorder::new();
1212
let Some(default_input) = audio_recorder.get_default_input_device()? else {
1313
panic!("No default input device found");
1414
};
@@ -43,7 +43,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
4343

4444
let mut session = RecordingSession::new(config);
4545

46-
let stop_sig = session.stop_sig().clone();
46+
let stop_sig = session.get_stop_sig().clone();
4747

4848
// Start a timer thread that stops recording after 5 seconds
4949
thread::spawn(move || {
@@ -53,10 +53,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
5353
});
5454

5555
session.start()?;
56-
session.wait(None::<Box<dyn FnMut(f32)>>, move |v| {
57-
let v = (v * 100.0) as u32;
58-
log::debug!("combine tracks progress: {v}%");
59-
})?;
56+
session.wait()?;
6057

6158
log::debug!("Recording completed successfully!");
6259

lib/recorder/examples/recording_5s_demo.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
3131
)
3232
// .with_enable_audio_channel_user(true)
3333
// .with_enable_speaker_channel_user(true)
34-
// .with_enable_denoise(true)
34+
.with_enable_denoise(true)
3535
.with_audio_device_name(Some(default_input.name))
36-
// .with_enable_recording_speaker(true)
37-
// .with_convert_mono(true)
36+
.with_enable_recording_speaker(true)
37+
.with_convert_to_mono(true)
3838
.with_resolution(recorder::Resolution::Original((
3939
screen_infos[0].logical_size.width as u32,
4040
screen_infos[0].logical_size.height as u32,

0 commit comments

Comments
 (0)