Skip to content

Commit 2164e71

Browse files
jin-2-kakaoenthyunjineeclaude
authored
fix: use custom viewport dimensions in streaming frame metadata and image resolution (#1033)
* fix: use custom viewport dimensions in streaming frame metadata CDP's Page.screencastFrame metadata returns physical device dimensions instead of the emulated viewport, causing frame messages to report incorrect deviceWidth/deviceHeight when a custom viewport is set. Use the viewport dimensions captured at screencast start instead of the CDP metadata values, since the screencast image is already captured at the configured viewport size. Closes #1031 * fix: resize browser content area on viewport change for correct screencast dimensions Emulation.setDeviceMetricsOverride only changes the CSS viewport, but screencast captures the actual browser content area. This caused frame images to have incorrect dimensions (e.g., 1000x451 instead of 1000x1000) when a custom viewport was set. - Call Browser.setContentsSize after setDeviceMetricsOverride so the content area matches the emulated viewport - Restart active screencast when viewport dimensions change so maxWidth/maxHeight parameters are updated - Skip redundant screencast restarts when dimensions are unchanged - Extend E2E test to verify actual JPEG image dimensions, not just metadata * fix: pass viewport dimensions to --window-size at launch and log setContentsSize failures - Add viewport_size to LaunchOptions so --window-size matches the configured viewport from the start, reducing reliance on the experimental Browser.setContentsSize CDP call at runtime - Log Browser.setContentsSize failures instead of silently ignoring them with let _ = * fix: remove duplicate viewport change detection block (dead code from merge) * fix: use log::debug! instead of eprintln! for setContentsSize failure * revert: use eprintln! instead of log crate for setContentsSize failure The daemon's stderr pipe is closed after startup, so log crate subscribers cannot output during normal operation. eprintln! is visible during startup and in tests, matching the existing convention. --------- Co-authored-by: hyunjinee <leehj0110@kakao.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6520e41 commit 2164e71

6 files changed

Lines changed: 203 additions & 8 deletions

File tree

cli/src/native/actions.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1491,7 +1491,13 @@ async fn connect_auto_with_fresh_tab() -> Result<BrowserManager, String> {
14911491
}
14921492

14931493
async fn auto_launch(state: &mut DaemonState) -> Result<(), String> {
1494-
let options = launch_options_from_env();
1494+
let mut options = launch_options_from_env();
1495+
1496+
// Use the stream server's viewport dimensions for --window-size so the
1497+
// content area matches the desired viewport from the start.
1498+
if let Some(ref server) = state.stream_server {
1499+
options.viewport_size = Some(server.viewport().await);
1500+
}
14951501
let engine = env::var("AGENT_BROWSER_ENGINE").ok();
14961502

14971503
// Store proxy credentials for Fetch.authRequired handling
@@ -1636,6 +1642,7 @@ fn launch_options_from_env() -> LaunchOptions {
16361642
.unwrap_or(false),
16371643
color_scheme: env::var("AGENT_BROWSER_COLOR_SCHEME").ok(),
16381644
download_path: env::var("AGENT_BROWSER_DOWNLOAD_PATH").ok(),
1645+
viewport_size: None,
16391646
use_real_keychain: false,
16401647
}
16411648
}
@@ -1744,6 +1751,7 @@ async fn handle_launch(cmd: &Value, state: &mut DaemonState) -> Result<Value, St
17441751
.get("downloadPath")
17451752
.and_then(|v| v.as_str())
17461753
.map(String::from),
1754+
viewport_size: None,
17471755
use_real_keychain: false,
17481756
};
17491757

cli/src/native/browser.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,39 @@ impl BrowserManager {
974974
Some(session_id),
975975
)
976976
.await?;
977+
978+
// Screencast captures the actual content area, not the emulated CSS
979+
// viewport, so resize the content area to match.
980+
if let Ok(target_id) = self.active_target_id() {
981+
if let Ok(window_info) = self
982+
.client
983+
.send_command(
984+
"Browser.getWindowForTarget",
985+
Some(json!({ "targetId": target_id })),
986+
None,
987+
)
988+
.await
989+
{
990+
if let Some(window_id) = window_info.get("windowId").and_then(|v| v.as_i64()) {
991+
if let Err(e) = self
992+
.client
993+
.send_command(
994+
"Browser.setContentsSize",
995+
Some(json!({
996+
"windowId": window_id,
997+
"width": width,
998+
"height": height,
999+
})),
1000+
None,
1001+
)
1002+
.await
1003+
{
1004+
eprintln!("Browser.setContentsSize failed (experimental CDP): {e}");
1005+
}
1006+
}
1007+
}
1008+
}
1009+
9771010
Ok(())
9781011
}
9791012

cli/src/native/cdp/chrome.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ pub struct LaunchOptions {
103103
pub ignore_https_errors: bool,
104104
pub color_scheme: Option<String>,
105105
pub download_path: Option<String>,
106+
/// Initial viewport dimensions used for `--window-size` so the content
107+
/// area matches the desired viewport from the start.
108+
pub viewport_size: Option<(u32, u32)>,
106109
/// When true, omit `--password-store=basic` and `--use-mock-keychain` so
107110
/// Chrome uses the real system keychain. Set automatically when launching
108111
/// with a copied Chrome profile.
@@ -127,6 +130,7 @@ impl Default for LaunchOptions {
127130
ignore_https_errors: false,
128131
color_scheme: None,
129132
download_path: None,
133+
viewport_size: None,
130134
use_real_keychain: false,
131135
}
132136
}
@@ -222,7 +226,8 @@ fn build_chrome_args(options: &LaunchOptions) -> Result<ChromeArgs, String> {
222226
.any(|a| a.starts_with("--start-maximized") || a.starts_with("--window-size="));
223227

224228
if !has_window_size && options.headless && !has_extensions {
225-
args.push("--window-size=1280,720".to_string());
229+
let (w, h) = options.viewport_size.unwrap_or((1280, 720));
230+
args.push(format!("--window-size={},{}", w, h));
226231
}
227232

228233
args.extend(options.args.iter().cloned());

cli/src/native/e2e_tests.rs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3893,6 +3893,148 @@ async fn e2e_relaunch_on_options_change() {
38933893
assert_success(&resp);
38943894
}
38953895

3896+
// ---------------------------------------------------------------------------
3897+
// Stream: custom viewport is reflected in screencast frame metadata
3898+
// ---------------------------------------------------------------------------
3899+
3900+
#[tokio::test]
3901+
#[ignore]
3902+
async fn e2e_stream_frame_metadata_respects_custom_viewport() {
3903+
let guard = EnvGuard::new(&["AGENT_BROWSER_SOCKET_DIR", "AGENT_BROWSER_SESSION"]);
3904+
let socket_dir = std::env::temp_dir().join(format!(
3905+
"agent-browser-e2e-stream-viewport-{}-{}",
3906+
std::process::id(),
3907+
std::time::SystemTime::now()
3908+
.duration_since(std::time::UNIX_EPOCH)
3909+
.expect("system clock should be after unix epoch")
3910+
.as_nanos()
3911+
));
3912+
std::fs::create_dir_all(&socket_dir).expect("socket dir should be created");
3913+
guard.set(
3914+
"AGENT_BROWSER_SOCKET_DIR",
3915+
socket_dir.to_str().expect("socket dir should be utf-8"),
3916+
);
3917+
guard.set("AGENT_BROWSER_SESSION", "e2e-stream-viewport");
3918+
3919+
let mut state = DaemonState::new();
3920+
3921+
// Enable stream on an ephemeral port
3922+
let resp = execute_command(
3923+
&json!({ "id": "1", "action": "stream_enable", "port": 0 }),
3924+
&mut state,
3925+
)
3926+
.await;
3927+
assert_success(&resp);
3928+
let port = get_data(&resp)["port"]
3929+
.as_u64()
3930+
.expect("stream enable should report the bound port");
3931+
3932+
// Set a custom viewport before launching the browser
3933+
let resp = execute_command(
3934+
&json!({ "id": "2", "action": "viewport", "width": 800, "height": 600 }),
3935+
&mut state,
3936+
)
3937+
.await;
3938+
assert_success(&resp);
3939+
3940+
// Connect a WebSocket client
3941+
let (mut ws, _) = tokio_tungstenite::connect_async(format!("ws://127.0.0.1:{port}"))
3942+
.await
3943+
.expect("websocket client should connect to runtime stream");
3944+
3945+
// Navigate to trigger browser launch and screencast
3946+
let resp = execute_command(
3947+
&json!({ "id": "3", "action": "navigate", "url": "data:text/html,<h1>Viewport Test</h1>" }),
3948+
&mut state,
3949+
)
3950+
.await;
3951+
assert_success(&resp);
3952+
3953+
// Wait for a frame message and verify both metadata and actual image dimensions
3954+
let mut found_frame = false;
3955+
let deadline = tokio::time::Instant::now() + tokio::time::Duration::from_secs(15);
3956+
while tokio::time::Instant::now() < deadline {
3957+
let msg = tokio::time::timeout(tokio::time::Duration::from_secs(3), ws.next()).await;
3958+
let Some(Ok(message)) = msg.ok().flatten() else {
3959+
continue;
3960+
};
3961+
if !message.is_text() {
3962+
continue;
3963+
}
3964+
let parsed: Value =
3965+
serde_json::from_str(message.to_text().expect("text message should be readable"))
3966+
.expect("stream payload should be valid JSON");
3967+
if parsed.get("type") == Some(&json!("frame")) {
3968+
let meta = &parsed["metadata"];
3969+
assert_eq!(
3970+
meta["deviceWidth"], 800,
3971+
"frame metadata deviceWidth should match custom viewport, got: {}",
3972+
meta
3973+
);
3974+
assert_eq!(
3975+
meta["deviceHeight"], 600,
3976+
"frame metadata deviceHeight should match custom viewport, got: {}",
3977+
meta
3978+
);
3979+
3980+
// Verify the actual JPEG image dimensions match the custom viewport.
3981+
let data_str = parsed
3982+
.get("data")
3983+
.and_then(|v| v.as_str())
3984+
.expect("frame message should include base64-encoded 'data' field");
3985+
{
3986+
use base64::Engine;
3987+
let bytes = base64::engine::general_purpose::STANDARD
3988+
.decode(data_str)
3989+
.expect("frame data should be valid base64");
3990+
let (img_w, img_h) = jpeg_dimensions(&bytes)
3991+
.expect("frame data should be a valid JPEG with SOF marker");
3992+
assert_eq!(
3993+
img_w, 800,
3994+
"JPEG image width should match custom viewport, got: {}",
3995+
img_w
3996+
);
3997+
assert_eq!(
3998+
img_h, 600,
3999+
"JPEG image height should match custom viewport, got: {}",
4000+
img_h
4001+
);
4002+
}
4003+
4004+
found_frame = true;
4005+
break;
4006+
}
4007+
}
4008+
assert!(
4009+
found_frame,
4010+
"should have received at least one frame message with correct viewport metadata"
4011+
);
4012+
4013+
// Cleanup
4014+
let resp = execute_command(
4015+
&json!({ "id": "4", "action": "stream_disable" }),
4016+
&mut state,
4017+
)
4018+
.await;
4019+
assert_success(&resp);
4020+
4021+
let resp = execute_command(&json!({ "id": "99", "action": "close" }), &mut state).await;
4022+
assert_success(&resp);
4023+
let _ = std::fs::remove_dir_all(&socket_dir);
4024+
}
4025+
4026+
/// Extract width and height from a JPEG's SOF0 (0xFFC0) or SOF2 (0xFFC2) marker.
4027+
fn jpeg_dimensions(data: &[u8]) -> Option<(u32, u32)> {
4028+
for i in 0..data.len().saturating_sub(8) {
4029+
if data[i] == 0xFF && (data[i + 1] == 0xC0 || data[i + 1] == 0xC2) {
4030+
let height = u16::from_be_bytes([data[i + 5], data[i + 6]]) as u32;
4031+
let width = u16::from_be_bytes([data[i + 7], data[i + 8]]) as u32;
4032+
return Some((width, height));
4033+
}
4034+
}
4035+
None
4036+
}
4037+
38964038
// ---------------------------------------------------------------------------
38974039
// Upload: ref-based selector support (issue #1107)
38984040
// ---------------------------------------------------------------------------

cli/src/native/stream/cdp_loop.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,8 @@ pub(super) async fn cdp_event_loop(
156156
"metadata": {
157157
"offsetTop": meta.and_then(|m| m.get("offsetTop")).and_then(|v| v.as_f64()).unwrap_or(0.0),
158158
"pageScaleFactor": meta.and_then(|m| m.get("pageScaleFactor")).and_then(|v| v.as_f64()).unwrap_or(1.0),
159-
"deviceWidth": meta.and_then(|m| m.get("deviceWidth")).and_then(|v| v.as_u64()).unwrap_or(1280),
160-
"deviceHeight": meta.and_then(|m| m.get("deviceHeight")).and_then(|v| v.as_u64()).unwrap_or(720),
159+
"deviceWidth": vw,
160+
"deviceHeight": vh,
161161
"scrollOffsetX": meta.and_then(|m| m.get("scrollOffsetX")).and_then(|v| v.as_f64()).unwrap_or(0.0),
162162
"scrollOffsetY": meta.and_then(|m| m.get("scrollOffsetY")).and_then(|v| v.as_f64()).unwrap_or(0.0),
163163
"timestamp": meta.and_then(|m| m.get("timestamp")).and_then(|v| v.as_u64()).unwrap_or(0),

cli/src/native/stream/mod.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,18 @@ impl StreamServer {
105105
*self.screencasting.lock().await
106106
}
107107

108-
/// Update the stored viewport dimensions used by status messages and screencast.
109-
/// Also notifies the screencast event loop to restart with the new dimensions.
108+
/// Update the stored viewport dimensions and restart the active screencast (if any)
109+
/// so frames are captured at the new size.
110110
pub async fn set_viewport(&self, width: u32, height: u32) {
111-
*self.viewport_width.lock().await = width;
112-
*self.viewport_height.lock().await = height;
111+
let mut vw = self.viewport_width.lock().await;
112+
let mut vh = self.viewport_height.lock().await;
113+
if *vw == width && *vh == height {
114+
return;
115+
}
116+
*vw = width;
117+
*vh = height;
118+
drop(vw);
119+
drop(vh);
113120
self.client_notify.notify_one();
114121
}
115122

0 commit comments

Comments
 (0)