Skip to content

Commit 1845118

Browse files
Fix bounds checks in FACT's string_to_compact method (bytecodealliance#13016)
We need to bounds check the source byte length, not the number of code units. Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>
1 parent 32e9240 commit 1845118

2 files changed

Lines changed: 104 additions & 19 deletions

File tree

crates/environ/src/fact/trampoline.rs

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1759,7 +1759,6 @@ impl<'a, 'b> Compiler<'a, 'b> {
17591759
dst_enc: FE,
17601760
) -> WasmString<'c> {
17611761
assert!(dst_enc.width() >= src_enc.width());
1762-
self.validate_string_length(src, dst_enc);
17631762

17641763
let src_mem_opts = {
17651764
match &src.opts.data_model {
@@ -1774,22 +1773,8 @@ impl<'a, 'b> Compiler<'a, 'b> {
17741773
}
17751774
};
17761775

1777-
// Calculate the source byte length given the size of each code
1778-
// unit. Note that this shouldn't overflow given
1779-
// `validate_string_length` above.
1780-
let mut src_byte_len_tmp = None;
1781-
let src_byte_len = if src_enc.width() == 1 {
1782-
src.len.idx
1783-
} else {
1784-
assert_eq!(src_enc.width(), 2);
1785-
self.instruction(LocalGet(src.len.idx));
1786-
self.ptr_uconst(src_mem_opts, 1);
1787-
self.ptr_shl(src_mem_opts);
1788-
let tmp = self.local_set_new_tmp(src.opts.data_model.unwrap_memory().ptr());
1789-
let ret = tmp.idx;
1790-
src_byte_len_tmp = Some(tmp);
1791-
ret
1792-
};
1776+
let (src_byte_len_tmp, src_byte_len) =
1777+
self.source_string_byte_len(src, src_enc, src_mem_opts);
17931778

17941779
// Convert the source code units length to the destination byte
17951780
// length type.
@@ -1851,6 +1836,39 @@ impl<'a, 'b> Compiler<'a, 'b> {
18511836

18521837
dst
18531838
}
1839+
1840+
/// Calculate the source byte length given the size of each code
1841+
/// unit.
1842+
///
1843+
/// Returns an optional temporary local if it was needed, which the caller
1844+
/// needs to deallocate with `free_temp_local`. Additionally returns the
1845+
/// index of the local which contains the byte length of the string, which
1846+
/// may point to the temporary local passed in.
1847+
fn source_string_byte_len(
1848+
&mut self,
1849+
src: &WasmString<'_>,
1850+
src_enc: FE,
1851+
src_mem_opts: &LinearMemoryOptions,
1852+
) -> (Option<TempLocal>, u32) {
1853+
self.validate_string_length(src, src_enc);
1854+
1855+
if src_enc.width() == 1 {
1856+
(None, src.len.idx)
1857+
} else {
1858+
assert_eq!(src_enc.width(), 2);
1859+
1860+
// Note that this shouldn't overflow given `validate_string_length`
1861+
// above.
1862+
self.instruction(LocalGet(src.len.idx));
1863+
self.ptr_uconst(src_mem_opts, 1);
1864+
self.ptr_shl(src_mem_opts);
1865+
let tmp = self.local_set_new_tmp(src.opts.data_model.unwrap_memory().ptr());
1866+
1867+
let idx = tmp.idx;
1868+
(Some(tmp), idx)
1869+
}
1870+
}
1871+
18541872
// Corresponding function for `store_string_to_utf8` in the spec.
18551873
//
18561874
// This translation works by possibly performing a number of
@@ -2252,7 +2270,9 @@ impl<'a, 'b> Compiler<'a, 'b> {
22522270
DataModel::LinearMemory(opts) => opts,
22532271
};
22542272

2255-
self.validate_string_length(src, src_enc);
2273+
let (src_byte_len_tmp, src_byte_len) =
2274+
self.source_string_byte_len(src, src_enc, src_mem_opts);
2275+
22562276
self.convert_src_len_to_dst(src.len.idx, src_mem_opts.ptr(), dst_mem_opts.ptr());
22572277
let dst_len = self.local_tee_new_tmp(dst_mem_opts.ptr());
22582278
let dst_byte_len = self.local_set_new_tmp(dst_mem_opts.ptr());
@@ -2265,7 +2285,7 @@ impl<'a, 'b> Compiler<'a, 'b> {
22652285
}
22662286
};
22672287

2268-
self.validate_string_inbounds(src, src.len.idx);
2288+
self.validate_string_inbounds(src, src_byte_len);
22692289
self.validate_string_inbounds(&dst, dst_byte_len.idx);
22702290

22712291
// Perform the initial latin1 transcode. This returns the number of
@@ -2384,6 +2404,9 @@ impl<'a, 'b> Compiler<'a, 'b> {
23842404

23852405
self.free_temp_local(src_len_tmp);
23862406
self.free_temp_local(dst_byte_len);
2407+
if let Some(tmp) = src_byte_len_tmp {
2408+
self.free_temp_local(tmp);
2409+
}
23872410

23882411
dst
23892412
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
;;! multi_memory = true
2+
3+
;; Transcode a utf16 string to latin1+utf16, but the original string is
4+
;; out-of-bounds. Should report a first-class error.
5+
(component
6+
(component $dst
7+
(core module $m
8+
(func (export "recv") (param i32 i32))
9+
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
10+
(memory (export "memory") 1)
11+
)
12+
(core instance $i (instantiate $m))
13+
(func (export "recv") (param "a" string)
14+
(canon lift (core func $i "recv") (realloc (func $i "realloc")) (memory $i "memory")
15+
string-encoding=latin1+utf16)
16+
)
17+
)
18+
19+
;; Source component: uses utf16 encoding.
20+
;; Passes a string placed near the END of linear memory to $dst.
21+
(component $src
22+
(import "recv" (func $recv (param "a" string)))
23+
(core module $libc
24+
(memory (export "memory") 1) ;; 1 page = 65536 bytes
25+
(func (export "realloc") (param i32 i32 i32 i32) (result i32) i32.const 0)
26+
)
27+
(core instance $libc (instantiate $libc))
28+
;; canon lower with utf16 — the source side of the mismatch.
29+
(core func $recv_lowered (canon lower (func $recv) string-encoding=utf16 (memory $libc "memory")))
30+
(core module $m
31+
(import "" "" (func $recv (param i32 i32)))
32+
(import "libc" "memory" (memory 0))
33+
(func (export "run")
34+
;; Write 8 UTF-16 code units (16 bytes) of 'A' (U+0041) at the end of memory.
35+
;; Offsets 65520–65535: exactly fills to the last byte of the page.
36+
(i32.store (i32.const 65520) (i32.const 0x00410041))
37+
(i32.store (i32.const 65524) (i32.const 0x00410041))
38+
(i32.store (i32.const 65528) (i32.const 0x00410041))
39+
(i32.store (i32.const 65532) (i32.const 0x00410041))
40+
41+
;; Pass ptr=65520, len=10 CODE UNITS (not bytes).
42+
;; We wrote 8 code units but claim 10 — the extra 2 are past end of memory.
43+
(call $recv (i32.const 65520) (i32.const 10))
44+
)
45+
)
46+
(core instance $i (instantiate $m
47+
(with "" (instance (export "" (func $recv_lowered))))
48+
(with "libc" (instance $libc))
49+
))
50+
(func (export "run")
51+
(canon lift (core func $i "run"))
52+
)
53+
)
54+
55+
;; Wire the components together and run.
56+
(instance $dst_inst (instantiate $dst))
57+
(instance $i (instantiate $src (with "recv" (func $dst_inst "recv"))))
58+
59+
(export "run" (func $i "run"))
60+
)
61+
62+
(assert_trap (invoke "run") "string content out-of-bounds")

0 commit comments

Comments
 (0)