diff --git a/examples/cbor.v b/examples/cbor.v new file mode 100644 index 00000000000000..c28e628aa7cb7d --- /dev/null +++ b/examples/cbor.v @@ -0,0 +1,78 @@ +module main + +import encoding.cbor +import encoding.hex +import time + +struct Address { + street string + city string + zip string @[cbor: 'postal_code'] +} + +struct User { + name string + age u32 + email ?string + tags []string + address Address + signed_up time.Time + internal string @[skip] +} + +fn main() { + user := User{ + name: 'Alice' + age: 30 + email: 'alice@example.com' + tags: ['admin', 'beta'] + address: Address{ + street: '1 Test Lane' + city: 'Paris' + zip: '75000' + } + signed_up: time.parse_iso8601('2025-01-15T10:00:00Z') or { time.now() } + internal: 'will not be encoded' + } + + // 1. Generic typed encode/decode + bytes := cbor.encode[User](user, cbor.EncodeOpts{})! + println('encoded ${bytes.len} bytes: ${hex.encode(bytes)}') + + back := cbor.decode[User](bytes, cbor.DecodeOpts{})! + println('round-trip name=${back.name} age=${back.age} city=${back.address.city}') + + // 2. Canonical encoding for stable hashing/signing + mut m := map[string]int{} + m['z'] = 26 + m['a'] = 1 + m['m'] = 13 + canonical := cbor.encode[map[string]int](m, cbor.EncodeOpts{ + canonical: true + })! + println('canonical map: ${hex.encode(canonical)}') + + // 3. Decode an unknown payload into a Value tree + v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{})! + if name_val := v.get('name') { + if s := name_val.as_string() { + println('peeked name from Value tree: ${s}') + } + } + + // 4. Manual streaming: build a CBOR array of mixed types + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_array_header(3) + p.pack_uint(42) + p.pack_text('hello') + p.pack_bool(true) + stream_bytes := p.bytes() + println('manual stream: ${hex.encode(stream_bytes)}') + + mut up := cbor.new_unpacker(stream_bytes, cbor.DecodeOpts{}) + n := up.unpack_array_header()! + first := up.unpack_uint()! + second := up.unpack_text()! + third := up.unpack_bool()! + println('unpacked array of ${n}: ${first}, "${second}", ${third}') +} diff --git a/vlib/encoding/cbor/README.md b/vlib/encoding/cbor/README.md new file mode 100644 index 00000000000000..3f6e8e799fbead --- /dev/null +++ b/vlib/encoding/cbor/README.md @@ -0,0 +1,193 @@ +## Description + +`encoding.cbor` is an RFC 8949 Concise Binary Object Representation codec. + +CBOR is a compact, schema-free binary format that supports the same value +model as JSON (numbers, strings, arrays, maps) plus byte strings, tagged +items, IEEE 754 floats at three widths, and a small set of "simple" +values (`true`, `false`, `null`, `undefined`). It is used by COSE/CWT +(IETF security stack), WebAuthn/FIDO2, the Matter smart-home protocol, +and many IoT stacks because messages are typically 30–60 % smaller than +JSON and parse without quoting/escaping. + +Three layers of API are available: + +* `encode[T]` / `decode[T]` — comptime-driven generic API. Works on + primitives, strings, arrays, maps, structs (with `@[cbor: 'name']`, + `@[skip]`, `@[cbor_rename: 'snake_case']`), enums, `time.Time` + (auto-tagged), and any type implementing `Marshaler` / `Unmarshaler`. +* `Packer` / `Unpacker` — manual streaming API. Use when the schema + isn't known at compile time, or when you need full control over tags, + indefinite-length items and simple values. +* `Value` sumtype — dynamic representation for round-tripping unknown + payloads or inspecting tagged data. + +Defaults follow RFC 8949 *preferred serialisation* (§4.2.2): floats +shrink to the shortest IEEE 754 width that preserves their value, and +every length argument uses the shortest encoding. Set +`EncodeOpts.canonical = true` to additionally sort map keys for +hash/signature stability (§4.2.1, deterministic encoding). + +## Usage + +### encode[T] / decode[T] + +```v +import encoding.cbor +import time + +struct Person { + name string + age int + email ?string + birthday time.Time +} + +fn main() { + bob := Person{ + name: 'Bob' + age: 30 + birthday: time.now() + } + + bytes := cbor.encode[Person](bob, cbor.EncodeOpts{})! + // bytes is []u8 — wire-ready CBOR + + back := cbor.decode[Person](bytes, cbor.DecodeOpts{})! + assert back.name == 'Bob' +} +``` + +Optional fields (`?T`) encode as CBOR `null` when set to `none`. Enums +encode as their underlying integer. + +### Struct attributes + +```v ignore +struct Login { + user_name string @[cbor: 'u'] // emit/read key "u" + password string @[skip] // never serialise + remember bool @[cbor_rename: 'kebab-case'] // becomes "remember" +} +``` + +The `@[cbor_rename: '...']` attribute on a struct (not a field) applies +a global rename strategy — supported strategies: `snake_case`, +`camelCase`, `PascalCase`, `kebab-case`, `SCREAMING_SNAKE_CASE`. + +### Manual streaming with Packer / Unpacker + +Use this when the schema is dynamic or when you need access to CBOR +features that don't map directly to V types (tags, indefinite-length +strings, custom simple values): + +```v +import encoding.cbor + +fn main() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_array_header(3)! + p.pack_uint(42)! + p.pack_text('hello')! + p.pack_bool(true)! + bytes := p.bytes() + + mut u := cbor.new_unpacker(bytes, cbor.DecodeOpts{}) + n := u.unpack_array_header()! // 3 + a := u.unpack_uint()! // 42 + b := u.unpack_text()! // 'hello' + c := u.unpack_bool()! // true + _ = n _ = a _ = b _ = c +} +``` + +### Dynamic values with `Value` + +When the payload schema is unknown at compile time, decode into +`cbor.Value` and walk the sumtype: + +```v +import encoding.cbor + +fn main() { + bytes := cbor.encode[map[string]int]({ + 'a': 1 + 'b': 2 + }, cbor.EncodeOpts{})! + + v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{})! + if val := v.get('a') { + if i := val.as_int() { + assert i == 1 + } + } +} +``` + +`Value` covers every CBOR type: `IntNum`, `FloatNum`, `Text`, `Bytes`, +`Array`, `Map`, `Tag`, `Bool`, `Null`, `Undefined`, `Simple`. Re-encoding +a `Value` round-trips bit-for-bit when the source was already in +preferred form. + +### Custom Marshaler / Unmarshaler + +For types that need a custom on-wire representation, implement either +side of the interface: + +```v ignore +import encoding.cbor + +struct Color { + r u8 + g u8 + b u8 +} + +pub fn (c Color) marshal_cbor(mut p cbor.Packer) ! { + p.pack_array_header(3)! + p.pack_uint(c.r)! + p.pack_uint(c.g)! + p.pack_uint(c.b)! +} + +pub fn (mut c Color) unmarshal_cbor(mut u cbor.Unpacker) ! { + n := u.unpack_array_header()! + if n != 3 { + return error('Color expects 3 elements') + } + c.r = u8(u.unpack_uint()!) + c.g = u8(u.unpack_uint()!) + c.b = u8(u.unpack_uint()!) +} +``` + +### Canonical (deterministic) encoding + +For hashing or signing, set `canonical: true` so that map keys are +sorted by length-then-lexicographic order (RFC 8949 §4.2.1): + +```v ignore +import encoding.cbor + +bytes := cbor.encode[map[string]int]({'b': 2, 'a': 1}, + cbor.EncodeOpts{ canonical: true })! +// keys are emitted in the order "a", "b" regardless of input order +``` + +### Tags and `time.Time` + +Values of type `time.Time` are serialised with tag 0 (RFC 3339 string) +on encode and accept either tag 0 or tag 1 (epoch seconds) on decode. +Custom tags can be emitted/read via `pack_tag` / `unpack_tag` or by +constructing a `Value` with `cbor.new_tag(number, content)`. + +## Conformance + +The test suite (`vlib/encoding/cbor/tests/`) covers every vector from +RFC 8949 Appendix A, plus indefinite-length strings, depth limits, +malformed-input rejection, UTF-8 validation, canonical ordering, and +tagged time round-trips. + +```bash +v test vlib/encoding/cbor/tests/ +``` diff --git a/vlib/encoding/cbor/cbor.v b/vlib/encoding/cbor/cbor.v new file mode 100644 index 00000000000000..919f785aaa649d --- /dev/null +++ b/vlib/encoding/cbor/cbor.v @@ -0,0 +1,37 @@ +// Package cbor implements RFC 8949 (Concise Binary Object Representation). +// +// Three layers of API are available: +// +// * `encode[T]` / `decode[T]` — comptime-driven generic API. Works on +// primitives, strings, arrays, maps, structs (with `@[cbor: 'name']` +// and `@[skip]` attributes), enums, `time.Time` (auto-tagged), and +// any type implementing `Marshaler` / `Unmarshaler`. +// +// * `Packer` / `Unpacker` — manual streaming API. Use when the schema +// isn't known at compile time, or when you need full control over +// tags, indefinite-length items and simple values. +// +// * `Value` sumtype — dynamic representation for round-tripping +// unknown payloads or inspecting tagged data. +// +// Defaults follow RFC 8949 *preferred serialisation* (§4.2.2): floats +// shrink to the shortest IEEE 754 width that preserves their value, and +// every length argument uses the shortest encoding. Set +// `EncodeOpts.canonical = true` to additionally sort map keys for +// hash/signature stability (§4.2.1, deterministic encoding). +module cbor + +// encode serialises any V value into CBOR bytes. The returned slice +// owns its backing buffer (V's GC tracks it) — no copy, so the returned +// bytes are safe to keep across calls and to pass to other modules. +pub fn encode[T](val T, opts EncodeOpts) ![]u8 { + mut p := new_packer(opts) + p.pack[T](val)! + return p.bytes() +} + +// decode parses CBOR bytes into a value of type T. +pub fn decode[T](data []u8, opts DecodeOpts) !T { + mut u := new_unpacker(data, opts) + return u.unpack[T]()! +} diff --git a/vlib/encoding/cbor/decoder.v b/vlib/encoding/cbor/decoder.v new file mode 100644 index 00000000000000..9c74962313b15c --- /dev/null +++ b/vlib/encoding/cbor/decoder.v @@ -0,0 +1,849 @@ +module cbor + +import math + +// DecodeOpts tunes the decoder. Defaults are conservative: UTF-8 is +// validated, depth is capped to fend off stack-blow-up payloads, and +// duplicate map keys are tolerated (callers that need detection turn +// `deny_duplicate_keys` on). +pub struct DecodeOpts { +pub: + max_depth int = 256 + max_stream_bytes int // 0 = unbounded for stream readers + validate_utf8 bool = true + deny_unknown_fields bool // struct decode rejects unmapped keys + deny_duplicate_keys bool // Map decode rejects repeated keys +} + +// Kind classifies the next item without consuming it. Useful to branch +// before committing to a typed read. +pub enum Kind { + unsigned // major type 0 + negative // major type 1 + bytes // major type 2 (definite or indefinite) + text // major type 3 (definite or indefinite) + array_val // major type 4 (definite or indefinite) + map_val // major type 5 (definite or indefinite) + tag_val // major type 6 + bool_val // simple 20/21 + null_val // simple 22 + undefined // simple 23 + simple_val // other simple values + float_val // half/single/double + break_code // 0xff outside a definite header +} + +// Unpacker walks a CBOR byte slice. Operates non-allocating where +// possible; strings and bytes returned by `unpack_text` / `unpack_bytes` +// always own their storage so they outlive the input buffer. +pub struct Unpacker { +pub mut: + data []u8 + pos int + opts DecodeOpts +} + +// new_unpacker constructs an Unpacker over the given byte slice. +pub fn new_unpacker(data []u8, opts DecodeOpts) Unpacker { + cap := if opts.max_depth > 0 { + opts + } else { + DecodeOpts{ + ...opts + max_depth: 256 + } + } + return Unpacker{ + data: data + pos: 0 + opts: cap + } +} + +// remaining returns the number of unread bytes. +@[inline] +pub fn (u &Unpacker) remaining() int { + return u.data.len - u.pos +} + +// done reports whether the unpacker has consumed every byte. +@[inline] +pub fn (u &Unpacker) done() bool { + return u.pos >= u.data.len +} + +// -------------------------------------------------------------------- +// Low-level byte reads +// -------------------------------------------------------------------- + +@[direct_array_access; inline] +fn (mut u Unpacker) read_byte() !u8 { + if u.pos >= u.data.len { + return eof_at(u.pos) + } + b := u.data[u.pos] + u.pos++ + return b +} + +@[direct_array_access; inline] +fn (u &Unpacker) peek_byte() !u8 { + if u.pos >= u.data.len { + return eof_at(u.pos) + } + return u.data[u.pos] +} + +@[direct_array_access; inline] +fn (mut u Unpacker) read_be_u16() !u16 { + if u.pos + 2 > u.data.len { + return eof_needing(u.pos, 2, u.data.len - u.pos) + } + v := u16(u.data[u.pos]) << 8 | u16(u.data[u.pos + 1]) + u.pos += 2 + return v +} + +@[direct_array_access; inline] +fn (mut u Unpacker) read_be_u32() !u32 { + if u.pos + 4 > u.data.len { + return eof_needing(u.pos, 4, u.data.len - u.pos) + } + v := u32(u.data[u.pos]) << 24 | u32(u.data[u.pos + 1]) << 16 | u32(u.data[u.pos + 2]) << 8 | u32(u.data[ + u.pos + 3]) + u.pos += 4 + return v +} + +@[direct_array_access; inline] +fn (mut u Unpacker) read_be_u64() !u64 { + if u.pos + 8 > u.data.len { + return eof_needing(u.pos, 8, u.data.len - u.pos) + } + v := u64(u.data[u.pos]) << 56 | u64(u.data[u.pos + 1]) << 48 | u64(u.data[u.pos + 2]) << 40 | u64(u.data[ + u.pos + 3]) << 32 | u64(u.data[u.pos + 4]) << 24 | u64(u.data[u.pos + 5]) << 16 | u64(u.data[ + u.pos + 6]) << 8 | u64(u.data[u.pos + 7]) + u.pos += 8 + return v +} + +// read_arg reads the additional-info argument for the given initial +// byte. Returns -1 to signal indefinite-length (info == 31) for major +// types that allow it; the caller decides whether that's legal. +fn (mut u Unpacker) read_arg(info u8) !u64 { + match info { + 0...23 { return u64(info) } + 24 { return u64(u.read_byte()!) } + 25 { return u64(u.read_be_u16()!) } + 26 { return u64(u.read_be_u32()!) } + 27 { return u.read_be_u64()! } + else { return malformed(u.pos - 1, 'reserved additional info ${info}') } + } +} + +// -------------------------------------------------------------------- +// Public peek +// -------------------------------------------------------------------- + +// peek_kind classifies the next item without consuming any input. +pub fn (u &Unpacker) peek_kind() !Kind { + if u.pos >= u.data.len { + return eof_at(u.pos) + } + b := u.data[u.pos] + major := b >> 5 + info := b & 0x1f + match major { + 0 { + return .unsigned + } + 1 { + return .negative + } + 2 { + return .bytes + } + 3 { + return .text + } + 4 { + return .array_val + } + 5 { + return .map_val + } + 6 { + return .tag_val + } + else { + match info { + 20, 21 { return .bool_val } + 22 { return .null_val } + 23 { return .undefined } + 25, 26, 27 { return .float_val } + 31 { return .break_code } + else { return .simple_val } + } + } + } +} + +// -------------------------------------------------------------------- +// High-level typed reads +// -------------------------------------------------------------------- + +// unpack_uint reads a non-negative integer (major type 0). Errors on +// negatives, floats, or other major types. +pub fn (mut u Unpacker) unpack_uint() !u64 { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 0 { + u.pos = start + return type_mismatch(start, 'unsigned', b) + } + return u.read_arg(b & 0x1f)! +} + +// unpack_int reads any CBOR integer (major type 0 or 1) into i64. Errors +// when the magnitude exceeds i64 range; use `unpack_int_full` to pull +// values as u64 with a separate sign flag. +pub fn (mut u Unpacker) unpack_int() !i64 { + start := u.pos + b := u.read_byte()! + major := b >> 5 + arg := u.read_arg(b & 0x1f)! + if major == 0 { + if arg > u64(max_i64) { + u.pos = start + return int_range(start, 'i64', arg.str()) + } + return i64(arg) + } + if major == 1 { + // Represented integer = -1 - arg. + if arg > u64(max_i64) { + u.pos = start + return int_range(start, 'i64', '-1 - ${arg}') + } + return -1 - i64(arg) + } + u.pos = start + return type_mismatch(start, 'integer', b) +} + +// unpack_int_full returns (negative, magnitude). For unsigned values +// negative=false and magnitude is the raw u64. For negative values +// negative=true and magnitude is the encoded argument (the integer +// itself is `-1 - magnitude`). +pub fn (mut u Unpacker) unpack_int_full() !(bool, u64) { + start := u.pos + b := u.read_byte()! + major := b >> 5 + arg := u.read_arg(b & 0x1f)! + if major == 0 { + return false, arg + } + if major == 1 { + return true, arg + } + u.pos = start + return type_mismatch(start, 'integer', b) +} + +// unpack_bool reads a CBOR boolean (simple 20/21). +pub fn (mut u Unpacker) unpack_bool() !bool { + b := u.read_byte()! + if b == 0xf4 { + return false + } + if b == 0xf5 { + return true + } + return type_mismatch(u.pos - 1, 'bool', b) +} + +// unpack_null consumes a CBOR null (0xf6) or errors with type mismatch. +pub fn (mut u Unpacker) unpack_null() ! { + b := u.read_byte()! + if b != 0xf6 { + return type_mismatch(u.pos - 1, 'null', b) + } +} + +// unpack_float reads a CBOR float of any width (half/single/double) and +// returns it as f64. +pub fn (mut u Unpacker) unpack_float() !f64 { + start := u.pos + b := u.read_byte()! + match b { + 0xf9 { + h := u.read_be_u16()! + return half_to_f64(h) + } + 0xfa { + bits := u.read_be_u32()! + return f64(math.f32_from_bits(bits)) + } + 0xfb { + bits := u.read_be_u64()! + return math.f64_from_bits(bits) + } + else { + u.pos = start + return type_mismatch(start, 'float', b) + } + } +} + +// unpack_simple reads a simple value (0..255). Bool/null/undefined are +// also simple values; this method returns the raw u8. +pub fn (mut u Unpacker) unpack_simple() !u8 { + start := u.pos + b := u.read_byte()! + if b >= 0xe0 && b <= 0xf3 { + return b & 0x1f + } + match b { + 0xf4 { + return 20 + } + 0xf5 { + return 21 + } + 0xf6 { + return 22 + } + 0xf7 { + return 23 + } + 0xf8 { + v := u.read_byte()! + if v < 32 { + u.pos = start + return malformed(start, 'simple value < 32 must use 1-byte form') + } + return v + } + else { + u.pos = start + return type_mismatch(start, 'simple', b) + } + } +} + +// unpack_text reads a definite or indefinite-length text string. The +// returned string owns its bytes (it's a clone of the input slice). +// UTF-8 validation runs unless `DecodeOpts.validate_utf8` is false. +pub fn (mut u Unpacker) unpack_text() !string { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 3 { + u.pos = start + return type_mismatch(start, 'text', b) + } + info := b & 0x1f + if info == 31 { + return u.read_indef_text()! + } + size := u.read_arg(info)! + return u.read_text_chunk(int(size))! +} + +@[direct_array_access] +fn (mut u Unpacker) read_text_chunk(size int) !string { + if u.pos + size > u.data.len { + return eof_needing(u.pos, size, u.data.len - u.pos) + } + bytes_start := u.pos + u.pos += size + if u.opts.validate_utf8 && !utf8_validate_slice(u.data, bytes_start, size) { + return InvalidUtf8Error{ + pos: bytes_start + } + } + return u.data[bytes_start..u.pos].bytestr() +} + +fn (mut u Unpacker) read_indef_text() !string { + mut acc := strings_builder_new() + for { + b := u.read_byte()! + if b == 0xff { + break + } + major := b >> 5 + info := b & 0x1f + if major != 3 || info == 31 { + return malformed(u.pos - 1, + 'indefinite-length text chunk must be a definite-length text string') + } + size := u.read_arg(info)! + s := u.read_text_chunk(int(size))! + acc.write_string(s) + } + return acc.str() +} + +// unpack_bytes reads a definite or indefinite-length byte string. The +// returned slice is a clone, safe to retain after the unpacker is freed. +pub fn (mut u Unpacker) unpack_bytes() ![]u8 { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 2 { + u.pos = start + return type_mismatch(start, 'bytes', b) + } + info := b & 0x1f + if info == 31 { + return u.read_indef_bytes()! + } + size := u.read_arg(info)! + return u.read_bytes_chunk(int(size))! +} + +@[direct_array_access] +fn (mut u Unpacker) read_bytes_chunk(size int) ![]u8 { + if u.pos + size > u.data.len { + return eof_needing(u.pos, size, u.data.len - u.pos) + } + out := u.data[u.pos..u.pos + size].clone() + u.pos += size + return out +} + +fn (mut u Unpacker) read_indef_bytes() ![]u8 { + mut acc := []u8{cap: 64} + for { + b := u.read_byte()! + if b == 0xff { + break + } + major := b >> 5 + info := b & 0x1f + if major != 2 || info == 31 { + return malformed(u.pos - 1, + 'indefinite-length bytes chunk must be a definite-length byte string') + } + size := u.read_arg(info)! + acc << u.read_bytes_chunk(int(size))! + } + return acc +} + +// unpack_array_header reads the prefix of an array. Returns the count +// for definite-length arrays, or -1 for indefinite-length arrays (the +// caller then loops until peek_kind() == .break_code and consumes the +// break with `expect_break`). +pub fn (mut u Unpacker) unpack_array_header() !i64 { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 4 { + u.pos = start + return type_mismatch(start, 'array', b) + } + info := b & 0x1f + if info == 31 { + return -1 + } + arg := u.read_arg(info)! + if arg > u64(max_i64) { + u.pos = start + return int_range(start, 'i64', arg.str()) + } + return i64(arg) +} + +// unpack_map_header reads the prefix of a map. Returns pair count or -1 +// for indefinite-length maps. +pub fn (mut u Unpacker) unpack_map_header() !i64 { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 5 { + u.pos = start + return type_mismatch(start, 'map', b) + } + info := b & 0x1f + if info == 31 { + return -1 + } + arg := u.read_arg(info)! + if arg > u64(max_i64) { + u.pos = start + return int_range(start, 'i64', arg.str()) + } + return i64(arg) +} + +// unpack_tag reads a tag header and returns the tag number. The caller +// must follow up by reading the tag content. +pub fn (mut u Unpacker) unpack_tag() !u64 { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 6 { + u.pos = start + return type_mismatch(start, 'tag', b) + } + return u.read_arg(b & 0x1f)! +} + +// peek_break reports whether the next byte is the break stop code. +@[inline] +pub fn (u &Unpacker) peek_break() bool { + return u.pos < u.data.len && u.data[u.pos] == 0xff +} + +// expect_break consumes a single 0xff break code; errors otherwise. +pub fn (mut u Unpacker) expect_break() ! { + b := u.read_byte()! + if b != 0xff { + return malformed(u.pos - 1, 'expected break code, got 0x${b:02x}') + } +} + +// -------------------------------------------------------------------- +// Skip +// -------------------------------------------------------------------- + +// skip_value advances past one complete CBOR value without allocating. +// Honours the depth cap so adversarial deeply-nested input cannot blow +// the stack. +pub fn (mut u Unpacker) skip_value() ! { + u.skip_inner(0)! +} + +fn (mut u Unpacker) skip_inner(depth int) ! { + if depth > u.opts.max_depth { + return MaxDepthError{ + pos: u.pos + max_depth: u.opts.max_depth + } + } + b := u.read_byte()! + major := b >> 5 + info := b & 0x1f + match major { + 0, 1 { + u.read_arg(info)! + } + 2, 3 { + if info == 31 { + // RFC 8949 §3.2.3: each chunk MUST be a definite-length + // string of the same major type — no nested indefinite, + // no cross-type chunks. Mirror unpack_text/unpack_bytes. + for { + if u.peek_break() { + u.pos++ + break + } + cb := u.read_byte()! + cmajor := cb >> 5 + cinfo := cb & 0x1f + if cmajor != major || cinfo == 31 { + return malformed(u.pos - 1, + 'indefinite-length string chunk must be a definite-length string of the same major type') + } + csize := u.read_arg(cinfo)! + if u.pos + int(csize) > u.data.len { + return eof_needing(u.pos, int(csize), u.data.len - u.pos) + } + u.pos += int(csize) + } + } else { + size := u.read_arg(info)! + if u.pos + int(size) > u.data.len { + return eof_needing(u.pos, int(size), u.data.len - u.pos) + } + u.pos += int(size) + } + } + 4 { + if info == 31 { + for { + if u.peek_break() { + u.pos++ + break + } + u.skip_inner(depth + 1)! + } + } else { + n := u.read_arg(info)! + for _ in 0 .. n { + u.skip_inner(depth + 1)! + } + } + } + 5 { + if info == 31 { + for { + if u.peek_break() { + u.pos++ + break + } + u.skip_inner(depth + 1)! // key + u.skip_inner(depth + 1)! // value + } + } else { + n := u.read_arg(info)! + for _ in 0 .. n { + u.skip_inner(depth + 1)! + u.skip_inner(depth + 1)! + } + } + } + 6 { + u.read_arg(info)! + u.skip_inner(depth + 1)! + } + else { + // Major type 7 (floats / simple). + match info { + 0...23 {} // simple values 0..23 inline + 24 { + u.pos++ + } // simple value 24..255 (one extra byte) + 25 { + u.pos += 2 + } // half + 26 { + u.pos += 4 + } // single + 27 { + u.pos += 8 + } // double + 31 { + return malformed(u.pos - 1, 'unexpected break stop code') + } + else { + return malformed(u.pos - 1, 'reserved additional info ${info}') + } + } + + if u.pos > u.data.len { + return eof_at(u.data.len) + } + } + } +} + +// -------------------------------------------------------------------- +// Value tree decoder +// -------------------------------------------------------------------- + +// unpack_value materialises one CBOR data item as a Value. +pub fn (mut u Unpacker) unpack_value() !Value { + return u.unpack_value_inner(0)! +} + +fn (mut u Unpacker) unpack_value_inner(depth int) !Value { + if depth > u.opts.max_depth { + return MaxDepthError{ + pos: u.pos + max_depth: u.opts.max_depth + } + } + start := u.pos + b := u.read_byte()! + major := b >> 5 + info := b & 0x1f + match major { + 0 { + arg := u.read_arg(info)! + return IntNum{ + negative: false + magnitude: arg + } + } + 1 { + arg := u.read_arg(info)! + return IntNum{ + negative: true + magnitude: arg + } + } + 2 { + u.pos = start + data := u.unpack_bytes()! + return Bytes{ + data: data + } + } + 3 { + u.pos = start + s := u.unpack_text()! + return Text{ + value: s + } + } + 4 { + if info == 31 { + mut elements := []Value{cap: 4} + for { + if u.peek_break() { + u.pos++ + break + } + elements << u.unpack_value_inner(depth + 1)! + } + return Array{ + elements: elements + } + } + n := u.read_arg(info)! + mut elements := []Value{cap: int(n)} + for _ in 0 .. n { + elements << u.unpack_value_inner(depth + 1)! + } + return Array{ + elements: elements + } + } + 5 { + if info == 31 { + mut pairs := []MapPair{cap: 4} + for { + if u.peek_break() { + u.pos++ + break + } + key := u.unpack_value_inner(depth + 1)! + val := u.unpack_value_inner(depth + 1)! + pairs << MapPair{ + key: key + value: val + } + } + return Map{ + pairs: pairs + } + } + n := u.read_arg(info)! + mut pairs := []MapPair{cap: int(n)} + for _ in 0 .. n { + key := u.unpack_value_inner(depth + 1)! + val := u.unpack_value_inner(depth + 1)! + pairs << MapPair{ + key: key + value: val + } + } + return Map{ + pairs: pairs + } + } + 6 { + number := u.read_arg(info)! + content := u.unpack_value_inner(depth + 1)! + // Native validation per RFC 8949 §3.4.1: tag 0 wraps an RFC 3339 + // text string; tag 1 wraps a numeric value (int or float). + // QCBOR does the same — accepting wrong content types here would + // allow well-formed-but-invalid payloads through. + if number == 0 && content !is Text { + return malformed(u.pos, 'tag 0 (date/time) must wrap a text string') + } + if number == 1 && content !is IntNum && content !is FloatNum { + return malformed(u.pos, 'tag 1 (epoch) must wrap a number') + } + return Tag{ + number: number + content_box: [content] + } + } + else { + match info { + 20 { + return Bool{ + value: false + } + } + 21 { + return Bool{ + value: true + } + } + 22 { + return Null{} + } + 23 { + return Undefined{} + } + 24 { + v := u.read_byte()! + if v < 32 { + u.pos = start + return malformed(start, 'simple value < 32 must use 1-byte form') + } + return Simple{ + value: v + } + } + 25 { + h := u.read_be_u16()! + return FloatNum{ + value: half_to_f64(h) + bits: .half + } + } + 26 { + bits := u.read_be_u32()! + return FloatNum{ + value: f64(math.f32_from_bits(bits)) + bits: .single + } + } + 27 { + bits := u.read_be_u64()! + return FloatNum{ + value: math.f64_from_bits(bits) + bits: .double + } + } + 31 { + u.pos = start + return malformed(start, 'unexpected break stop code') + } + else { + if info <= 19 { + return Simple{ + value: info + } + } + u.pos = start + return malformed(start, 'reserved additional info ${info}') + } + } + } + } +} + +// strings_builder_new is a small alias to keep the import surface tight +// (we only need the strings module for indefinite-length text accumulation). +@[inline] +fn strings_builder_new() StringsBuilder { + return StringsBuilder{ + buf: []u8{cap: 32} + } +} + +struct StringsBuilder { +mut: + buf []u8 +} + +@[inline] +fn (mut b StringsBuilder) write_string(s string) { + if s == '' { + return + } + unsafe { b.buf.push_many(s.str, s.len) } +} + +@[inline] +fn (mut b StringsBuilder) str() string { + return b.buf.bytestr() +} diff --git a/vlib/encoding/cbor/encoder.v b/vlib/encoding/cbor/encoder.v new file mode 100644 index 00000000000000..39446907273d34 --- /dev/null +++ b/vlib/encoding/cbor/encoder.v @@ -0,0 +1,627 @@ +module cbor + +import math + +// EncodeOpts tunes the encoder. Defaults yield RFC 8949 *preferred* +// serialisation: floats shrink to the shortest IEEE 754 width that +// preserves their value, headers use the shortest length encoding. +// +// Setting `canonical = true` additionally sorts map keys per RFC 8949 +// §4.2.1 (deterministic encoding) — useful for hashing/signing. +pub struct EncodeOpts { +pub: + initial_cap int = 64 + canonical bool // sort map keys, definite-length only + self_describe bool // prepend tag 55799 (`d9 d9 f7`) +} + +// Packer accumulates CBOR bytes into an internal buffer. Use `bytes()` +// to retrieve the wire output, or `reset()` to reuse the buffer for the +// next message — that's the cheapest way to emit many small frames. +pub struct Packer { +pub mut: + buf []u8 + opts EncodeOpts +} + +// new_packer builds a Packer with the given options. `opts.initial_cap` +// reserves the buffer up-front; oversize is harmless, undersize triggers +// the usual growth policy. +pub fn new_packer(opts EncodeOpts) Packer { + cap := if opts.initial_cap > 0 { opts.initial_cap } else { 64 } + mut p := Packer{ + buf: []u8{cap: cap} + opts: opts + } + if opts.self_describe { + p.buf << self_describe_prefix + } + return p +} + +// bytes returns the encoded buffer. The returned slice aliases the +// Packer's storage — clone it if you keep using the Packer. +@[inline] +pub fn (mut p Packer) bytes() []u8 { + return p.buf +} + +// reset clears the buffer for reuse. The capacity is preserved, so this +// is the fast path for high-throughput senders. +@[inline] +pub fn (mut p Packer) reset() { + unsafe { + p.buf.len = 0 + } + if p.opts.self_describe { + p.buf << self_describe_prefix + } +} + +// reserve grows the buffer's capacity by at least `n` bytes. Useful +// before a string/binary write of known length to skip per-byte growth. +@[inline] +pub fn (mut p Packer) reserve(n int) { + if n <= 0 { + return + } + needed := p.buf.len + n + if needed > p.buf.cap { + mut new_cap := if p.buf.cap == 0 { 64 } else { p.buf.cap * 2 } + for new_cap < needed { + new_cap *= 2 + } + mut grown := []u8{cap: new_cap} + grown << p.buf + p.buf = grown + } +} + +// extend_unchecked grows the buffer's length by `n`. The caller must +// have already ensured enough capacity via `reserve`. Returns the +// position at which the new bytes start. +@[direct_array_access; inline] +fn (mut p Packer) extend_unchecked(n int) int { + pos := p.buf.len + unsafe { + p.buf.len = pos + n + } + return pos +} + +// -------------------------------------------------------------------- +// Low-level head writer +// -------------------------------------------------------------------- + +// write_head emits an initial byte (major type | additional info) plus +// the appropriate big-endian argument. Always uses the shortest encoding +// (RFC 8949 §4.2.1, "preferred serialization"). Hot path: avoid the +// `<<` operator (which carries cap-grow checks per byte) by reserving +// once, then using direct unsafe index writes. +@[direct_array_access; inline] +fn (mut p Packer) write_head(major u8, arg u64) { + if arg < 24 { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = major | u8(arg) + } + return + } + if arg <= 0xff { + p.reserve(2) + pos := p.extend_unchecked(2) + unsafe { + p.buf[pos] = major | 24 + p.buf[pos + 1] = u8(arg) + } + return + } + if arg <= 0xffff { + p.reserve(3) + pos := p.extend_unchecked(3) + unsafe { + p.buf[pos] = major | 25 + p.buf[pos + 1] = u8(arg >> 8) + p.buf[pos + 2] = u8(arg) + } + return + } + if arg <= 0xffffffff { + p.reserve(5) + pos := p.extend_unchecked(5) + unsafe { + p.buf[pos] = major | 26 + p.buf[pos + 1] = u8(arg >> 24) + p.buf[pos + 2] = u8(arg >> 16) + p.buf[pos + 3] = u8(arg >> 8) + p.buf[pos + 4] = u8(arg) + } + return + } + p.reserve(9) + pos := p.extend_unchecked(9) + unsafe { + p.buf[pos] = major | 27 + p.buf[pos + 1] = u8(arg >> 56) + p.buf[pos + 2] = u8(arg >> 48) + p.buf[pos + 3] = u8(arg >> 40) + p.buf[pos + 4] = u8(arg >> 32) + p.buf[pos + 5] = u8(arg >> 24) + p.buf[pos + 6] = u8(arg >> 16) + p.buf[pos + 7] = u8(arg >> 8) + p.buf[pos + 8] = u8(arg) + } +} + +@[direct_array_access; inline] +fn (mut p Packer) write_be_u16(v u16) { + p.reserve(2) + pos := p.extend_unchecked(2) + unsafe { + p.buf[pos] = u8(v >> 8) + p.buf[pos + 1] = u8(v) + } +} + +@[direct_array_access; inline] +fn (mut p Packer) write_be_u32(v u32) { + p.reserve(4) + pos := p.extend_unchecked(4) + unsafe { + p.buf[pos] = u8(v >> 24) + p.buf[pos + 1] = u8(v >> 16) + p.buf[pos + 2] = u8(v >> 8) + p.buf[pos + 3] = u8(v) + } +} + +@[direct_array_access; inline] +fn (mut p Packer) write_be_u64(v u64) { + p.reserve(8) + pos := p.extend_unchecked(8) + unsafe { + p.buf[pos] = u8(v >> 56) + p.buf[pos + 1] = u8(v >> 48) + p.buf[pos + 2] = u8(v >> 40) + p.buf[pos + 3] = u8(v >> 32) + p.buf[pos + 4] = u8(v >> 24) + p.buf[pos + 5] = u8(v >> 16) + p.buf[pos + 6] = u8(v >> 8) + p.buf[pos + 7] = u8(v) + } +} + +// -------------------------------------------------------------------- +// High-level packers — primitives +// -------------------------------------------------------------------- + +// pack_uint emits a CBOR unsigned-integer (major type 0). Covers the +// full u64 range, including values above i64.max. +@[inline] +pub fn (mut p Packer) pack_uint(v u64) { + p.write_head(0x00, v) +} + +// pack_int picks the right major type for a signed integer. +// For values below i64.min that can still fit -1-u64, prefer +// `pack_negative_arg`. +@[inline] +pub fn (mut p Packer) pack_int(v i64) { + if v >= 0 { + p.write_head(0x00, u64(v)) + } else { + p.write_head(0x20, u64(-1 - v)) + } +} + +// pack_negative_arg writes a major type 1 value where the encoded +// argument is `arg` and the represented integer is `-1 - arg`. Lets you +// emit values down to -2^64 (the lower bound of CBOR negative ints). +@[inline] +pub fn (mut p Packer) pack_negative_arg(arg u64) { + p.write_head(0x20, arg) +} + +// pack_bool emits the simple value 20 (false) or 21 (true). +@[direct_array_access; inline] +pub fn (mut p Packer) pack_bool(v bool) { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = if v { u8(0xf5) } else { u8(0xf4) } + } +} + +// pack_null emits CBOR null (simple value 22, byte 0xf6). +@[direct_array_access; inline] +pub fn (mut p Packer) pack_null() { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0xf6 + } +} + +// pack_undefined emits CBOR undefined (simple value 23, byte 0xf7). +@[direct_array_access; inline] +pub fn (mut p Packer) pack_undefined() { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0xf7 + } +} + +// pack_simple emits a CBOR simple value. Values 0..23 use the inline +// form, values 32..255 use the 1-byte trailer form. Values 24..31 are +// not well-formed per RFC 8949 §3.3 and are rejected here. +@[direct_array_access] +pub fn (mut p Packer) pack_simple(v u8) ! { + if v < 24 { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0xe0 | v + } + return + } + if v < 32 { + return error('cbor: simple values 24..31 are not well-formed (RFC 8949 §3.3)') + } + p.reserve(2) + pos := p.extend_unchecked(2) + unsafe { + p.buf[pos] = 0xf8 + p.buf[pos + 1] = v + } +} + +// -------------------------------------------------------------------- +// High-level packers — strings and bytes +// -------------------------------------------------------------------- + +// pack_text writes a UTF-8 text string (major type 3). Single-shot +// reservation: the head + payload bytes are appended via one capacity +// check and one memcpy. +@[direct_array_access] +pub fn (mut p Packer) pack_text(s string) { + if s.len < 24 { + // Short string: head + payload fit in s.len + 1 bytes. + total := s.len + 1 + p.reserve(total) + pos := p.extend_unchecked(total) + unsafe { + p.buf[pos] = u8(0x60) | u8(s.len) + if s.len > 0 { + vmemcpy(&p.buf[pos + 1], s.str, s.len) + } + } + return + } + p.write_head(0x60, u64(s.len)) + p.reserve(s.len) + unsafe { p.buf.push_many(s.str, s.len) } +} + +// pack_bytes writes a byte string (major type 2). +@[direct_array_access] +pub fn (mut p Packer) pack_bytes(b []u8) { + if b.len < 24 { + total := b.len + 1 + p.reserve(total) + pos := p.extend_unchecked(total) + unsafe { + p.buf[pos] = u8(0x40) | u8(b.len) + if b.len > 0 { + vmemcpy(&p.buf[pos + 1], b.data, b.len) + } + } + return + } + p.write_head(0x40, u64(b.len)) + p.reserve(b.len) + unsafe { p.buf.push_many(b.data, b.len) } +} + +// -------------------------------------------------------------------- +// High-level packers — arrays, maps, tags +// -------------------------------------------------------------------- + +// pack_array_header writes the prefix for a definite-length array. +@[inline] +pub fn (mut p Packer) pack_array_header(n u64) { + p.write_head(0x80, n) +} + +// pack_map_header writes the prefix for a definite-length map. The +// argument is the number of *pairs*, not items. +@[inline] +pub fn (mut p Packer) pack_map_header(n u64) { + p.write_head(0xa0, n) +} + +// pack_tag writes a tag header (major type 6). The next packed item is +// the tag's content. +@[inline] +pub fn (mut p Packer) pack_tag(number u64) { + p.write_head(0xc0, number) +} + +// pack_array_indef opens an indefinite-length array. Close with +// `pack_break`. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_array_indef() { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0x9f + } +} + +// pack_map_indef opens an indefinite-length map. Close with `pack_break`. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_map_indef() { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0xbf + } +} + +// pack_text_indef opens an indefinite-length text string. Each chunk +// must be a definite-length text string; close with `pack_break`. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_text_indef() { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0x7f + } +} + +// pack_bytes_indef opens an indefinite-length byte string. Each chunk +// must be a definite-length byte string; close with `pack_break`. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_bytes_indef() { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0x5f + } +} + +// pack_break writes the break stop code 0xff that terminates an +// indefinite-length item. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_break() { + p.reserve(1) + pos := p.extend_unchecked(1) + unsafe { + p.buf[pos] = 0xff + } +} + +// -------------------------------------------------------------------- +// High-level packers — floats with preferred serialisation +// -------------------------------------------------------------------- + +// pack_float64 always emits an 8-byte IEEE 754 float. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_float64(v f64) { + p.reserve(9) + pos := p.extend_unchecked(9) + bits := math.f64_bits(v) + unsafe { + p.buf[pos] = 0xfb + p.buf[pos + 1] = u8(bits >> 56) + p.buf[pos + 2] = u8(bits >> 48) + p.buf[pos + 3] = u8(bits >> 40) + p.buf[pos + 4] = u8(bits >> 32) + p.buf[pos + 5] = u8(bits >> 24) + p.buf[pos + 6] = u8(bits >> 16) + p.buf[pos + 7] = u8(bits >> 8) + p.buf[pos + 8] = u8(bits) + } +} + +// pack_float32 always emits a 4-byte IEEE 754 float. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_float32(v f32) { + p.reserve(5) + pos := p.extend_unchecked(5) + bits := math.f32_bits(v) + unsafe { + p.buf[pos] = 0xfa + p.buf[pos + 1] = u8(bits >> 24) + p.buf[pos + 2] = u8(bits >> 16) + p.buf[pos + 3] = u8(bits >> 8) + p.buf[pos + 4] = u8(bits) + } +} + +// pack_float16_bits always emits a 2-byte IEEE 754 float. +@[direct_array_access; inline] +pub fn (mut p Packer) pack_float16_bits(bits u16) { + p.reserve(3) + pos := p.extend_unchecked(3) + unsafe { + p.buf[pos] = 0xf9 + p.buf[pos + 1] = u8(bits >> 8) + p.buf[pos + 2] = u8(bits) + } +} + +// pack_float emits the shortest IEEE 754 width that preserves the value, +// per RFC 8949 §4.2.2. NaN serialises as the canonical quiet NaN +// (0xf97e00), not the original payload. +@[direct_array_access] +pub fn (mut p Packer) pack_float(v f64) { + if math.is_nan(v) { + p.pack_float16_bits(half_qnan_bits) + return + } + if math.is_inf(v, 1) { + p.pack_float16_bits(half_pos_inf_bits) + return + } + if math.is_inf(v, -1) { + p.pack_float16_bits(half_neg_inf_bits) + return + } + // Try f32: lossless conversion? + f32_v := f32(v) + if f64(f32_v) == v { + bits16, ok := f32_to_half(f32_v) + if ok { + p.pack_float16_bits(bits16) + return + } + p.pack_float32(f32_v) + return + } + p.pack_float64(v) +} + +// -------------------------------------------------------------------- +// Value tree encoder +// -------------------------------------------------------------------- + +// pack_value emits an arbitrary `Value` tree, honouring the original +// float width hint. Map keys are sorted when `opts.canonical` is set. +pub fn (mut p Packer) pack_value(v Value) { + match v { + IntNum { + if v.negative { + p.write_head(0x20, v.magnitude) + } else { + p.write_head(0x00, v.magnitude) + } + } + Bytes { + p.pack_bytes(v.data) + } + Text { + p.pack_text(v.value) + } + Array { + p.pack_array_header(u64(v.elements.len)) + for el in v.elements { + p.pack_value(el) + } + } + Map { + p.pack_map_header(u64(v.pairs.len)) + if p.opts.canonical { + p.pack_map_canonical(v.pairs) + } else { + for pair in v.pairs { + p.pack_value(pair.key) + p.pack_value(pair.value) + } + } + } + Tag { + p.pack_tag(v.number) + if v.content_box.len > 0 { + p.pack_value(v.content_box[0]) + } else { + p.pack_null() + } + } + Bool { + p.pack_bool(v.value) + } + Null { + p.pack_null() + } + Undefined { + p.pack_undefined() + } + FloatNum { + match v.bits { + .half { + // NaN/±Inf bypass the lossless check (NaN != NaN + // breaks the f32 round-trip equality test). + if math.is_nan(v.value) { + p.pack_float16_bits(half_qnan_bits) + } else if math.is_inf(v.value, 1) { + p.pack_float16_bits(half_pos_inf_bits) + } else if math.is_inf(v.value, -1) { + p.pack_float16_bits(half_neg_inf_bits) + } else { + bits16, ok := f64_to_half(v.value) + if ok { + p.pack_float16_bits(bits16) + } else { + p.pack_float64(v.value) + } + } + } + .single { + p.pack_float32(f32(v.value)) + } + .double { + p.pack_float64(v.value) + } + .@none { + p.pack_float(v.value) + } + } + } + Simple { + p.pack_simple(v.value) or {} + } + } +} + +// pack_map_canonical sorts pairs by encoded-key bytes per RFC 8949 +// §4.2.1 (length-first lexicographic, "bytewise lexicographic of the +// deterministic encodings of the keys") before emitting them. +fn (mut p Packer) pack_map_canonical(pairs []MapPair) { + if pairs.len == 0 { + return + } + // Encode each key once, sort indices by the encoded key bytes, then emit. + mut encoded_keys := [][]u8{cap: pairs.len} + for pair in pairs { + mut sub := new_packer(EncodeOpts{ initial_cap: 16, canonical: true }) + sub.pack_value(pair.key) + encoded_keys << sub.bytes().clone() + } + mut idx := []int{len: pairs.len, init: index} + idx.sort_with_compare(fn [encoded_keys] (a &int, b &int) int { + return compare_canonical_keys(encoded_keys[*a], encoded_keys[*b]) + }) + for i in idx { + p.reserve(encoded_keys[i].len) + unsafe { p.buf.push_many(encoded_keys[i].data, encoded_keys[i].len) } + p.pack_value(pairs[i].value) + } +} + +// compare_canonical_keys orders byte slices by length first, then +// bytewise; this matches RFC 8949 §4.2.1 "Core Deterministic Encoding". +@[direct_array_access] +fn compare_canonical_keys(a []u8, b []u8) int { + if a.len != b.len { + return if a.len < b.len { -1 } else { 1 } + } + for i in 0 .. a.len { + if a[i] != b[i] { + return if a[i] < b[i] { -1 } else { 1 } + } + } + return 0 +} + +// -------------------------------------------------------------------- +// Module-level convenience wrappers +// -------------------------------------------------------------------- + +// encode_value emits a `Value` tree to a fresh byte slice with default opts. +pub fn encode_value(v Value, opts EncodeOpts) []u8 { + mut p := new_packer(opts) + p.pack_value(v) + return p.bytes().clone() +} diff --git a/vlib/encoding/cbor/errors.v b/vlib/encoding/cbor/errors.v new file mode 100644 index 00000000000000..eb9fca9cad5fd9 --- /dev/null +++ b/vlib/encoding/cbor/errors.v @@ -0,0 +1,146 @@ +module cbor + +// Typed errors for CBOR decode failures. Pattern-match in callers: +// +// cbor.decode[User](bad) or { +// if err is cbor.UnexpectedEofError { ... } +// } + +// UnexpectedEofError fires when the decoder runs past the end of its input. +pub struct UnexpectedEofError { + Error +pub: + pos int // position at which the read began + need int // bytes the decoder was trying to read + remaining int // bytes actually available +} + +// msg formats an UnexpectedEofError for `IError.msg()`. +pub fn (e &UnexpectedEofError) msg() string { + return 'cbor: unexpected EOF at pos ${e.pos}: need ${e.need} bytes, have ${e.remaining}' +} + +// MalformedError fires when the byte stream violates RFC 8949 well-formedness. +pub struct MalformedError { + Error +pub: + pos int + reason string +} + +// msg formats a MalformedError for `IError.msg()`. +pub fn (e &MalformedError) msg() string { + return 'cbor: malformed at pos ${e.pos}: ${e.reason}' +} + +// TypeMismatchError fires when a typed read finds a different major type. +pub struct TypeMismatchError { + Error +pub: + pos int + expected string + got u8 // initial byte +} + +// msg formats a TypeMismatchError for `IError.msg()`. +pub fn (e &TypeMismatchError) msg() string { + return 'cbor: type mismatch at pos ${e.pos}: expected ${e.expected}, got initial byte 0x${e.got:02x}' +} + +// MaxDepthError fires when nested arrays/maps exceed the configured cap. +pub struct MaxDepthError { + Error +pub: + pos int + max_depth int +} + +// msg formats a MaxDepthError for `IError.msg()`. +pub fn (e &MaxDepthError) msg() string { + return 'cbor: max nesting depth ${e.max_depth} exceeded at pos ${e.pos}' +} + +// UnknownFieldError fires when a struct decoded with `deny_unknown_fields` +// encounters an unmapped key. +pub struct UnknownFieldError { + Error +pub: + pos int + name string +} + +// msg formats an UnknownFieldError for `IError.msg()`. +pub fn (e &UnknownFieldError) msg() string { + return 'cbor: unknown field "${e.name}" at pos ${e.pos}' +} + +// IntRangeError fires when a decoded integer doesn't fit the target type. +pub struct IntRangeError { + Error +pub: + pos int + target string + value string +} + +// msg formats an IntRangeError for `IError.msg()`. +pub fn (e &IntRangeError) msg() string { + return 'cbor: integer ${e.value} at pos ${e.pos} out of range for ${e.target}' +} + +// InvalidUtf8Error fires when a text-string payload isn't valid UTF-8 and +// the decoder is configured to validate strings. +pub struct InvalidUtf8Error { + Error +pub: + pos int +} + +// msg formats an InvalidUtf8Error for `IError.msg()`. +pub fn (e &InvalidUtf8Error) msg() string { + return 'cbor: invalid UTF-8 in text string at pos ${e.pos}' +} + +@[cold; inline] +fn eof_at(pos int) IError { + return UnexpectedEofError{ + pos: pos + need: 1 + remaining: 0 + } +} + +@[cold; inline] +fn eof_needing(pos int, need int, remaining int) IError { + return UnexpectedEofError{ + pos: pos + need: need + remaining: remaining + } +} + +@[cold; inline] +fn malformed(pos int, reason string) IError { + return MalformedError{ + pos: pos + reason: reason + } +} + +@[cold; inline] +fn type_mismatch(pos int, expected string, got u8) IError { + return TypeMismatchError{ + pos: pos + expected: expected + got: got + } +} + +@[cold; inline] +fn int_range(pos int, target string, value string) IError { + return IntRangeError{ + pos: pos + target: target + value: value + } +} diff --git a/vlib/encoding/cbor/generic.v b/vlib/encoding/cbor/generic.v new file mode 100644 index 00000000000000..ac0bfb3fa50055 --- /dev/null +++ b/vlib/encoding/cbor/generic.v @@ -0,0 +1,701 @@ +module cbor + +import time + +// Generic comptime-driven encoder/decoder. The pack[T] / unpack[T] +// methods below dispatch on T at compile time, so each call site +// monomorphises into straight-line code with no runtime type tests. +// +// Supported targets: +// * bool, all signed/unsigned integer widths, f32, f64 +// * string (text), []u8 (byte string), enums (encoded as int) +// * `$array` (any V array) and `$map` (only `map[string]V` decodable +// directly; encoder accepts any `$map`) +// * `$struct` (encoded as a string-keyed map; honours +// `@[cbor: 'alt']`, `@[skip]`, `@[cbor: '-']`, optional fields) +// * `time.Time` — encoded as tag 1 (epoch seconds, integer) on encode; +// accepts tag 0 (RFC 3339 text) or tag 1 on decode. +// * `RawMessage`, `Value`, `Marshaler`/`Unmarshaler` implementers. + +// pack encodes `val` into the packer's buffer using compile-time dispatch. +@[inline] +pub fn (mut p Packer) pack[T](val T) ! { + $if T is RawMessage { + p.pack_raw(val) + } $else $if T is Marshaler { + bytes := val.to_cbor() + if bytes.len > 0 { + p.reserve(bytes.len) + unsafe { p.buf.push_many(bytes.data, bytes.len) } + } + } $else $if T is Value { + p.pack_value(val) + } $else $if T is time.Time { + p.pack_tag(tag_epoch) + p.pack_int(val.unix()) + } $else $if T is string { + p.pack_text(val) + } $else $if T is bool { + p.pack_bool(val) + } $else $if T is i8 { + p.pack_int(i64(val)) + } $else $if T is i16 { + p.pack_int(i64(val)) + } $else $if T is int { + p.pack_int(i64(val)) + } $else $if T is i32 { + p.pack_int(i64(val)) + } $else $if T is i64 { + p.pack_int(val) + } $else $if T is u8 { + p.pack_uint(u64(val)) + } $else $if T is u16 { + p.pack_uint(u64(val)) + } $else $if T is u32 { + p.pack_uint(u64(val)) + } $else $if T is u64 { + p.pack_uint(val) + } $else $if T is f32 { + p.pack_float(f64(val)) + } $else $if T is f64 { + p.pack_float(val) + } $else $if T is $enum { + p.pack_int(i64(val)) + } $else $if T is []u8 { + p.pack_bytes(val) + } $else $if T is $array { + p.pack_array_header(u64(val.len)) + for item in val { + p.pack(item)! + } + } $else $if T is $map { + p.pack_map_header(u64(val.len)) + if p.opts.canonical && val.len > 1 { + mut encoded_keys := [][]u8{cap: val.len} + mut encoded_vals := [][]u8{cap: val.len} + for k, item in val { + mut ksub := new_packer(EncodeOpts{ initial_cap: 16, canonical: true }) + ksub.pack(k)! + encoded_keys << ksub.bytes().clone() + mut vsub := new_packer(EncodeOpts{ initial_cap: 16, canonical: true }) + vsub.pack(item)! + encoded_vals << vsub.bytes().clone() + } + mut idx := []int{len: val.len, init: index} + idx.sort_with_compare(fn [encoded_keys] (a &int, b &int) int { + return compare_canonical_keys(encoded_keys[*a], encoded_keys[*b]) + }) + for i in idx { + p.reserve(encoded_keys[i].len + encoded_vals[i].len) + unsafe { + p.buf.push_many(encoded_keys[i].data, encoded_keys[i].len) + p.buf.push_many(encoded_vals[i].data, encoded_vals[i].len) + } + } + } else { + for k, item in val { + p.pack(k)! + p.pack(item)! + } + } + } $else $if T is $struct { + mut strategy := '' + $for attr in T.attributes { + if attr.name == 'cbor_rename_all' { + strategy = attr.arg + } + } + mut field_count := 0 + $for field in T.fields { + if !cbor_field_skipped(field) { + field_count++ + } + } + p.pack_map_header(u64(field_count)) + if p.opts.canonical && field_count > 1 { + // RFC 8949 §4.2.1: deterministic encoding requires keys to + // be ordered by their encoded byte form, not by struct + // declaration. Encode each (key, value) pair to a sub-buffer, + // sort, then splice — same shape as the $map branch above. + mut encoded_keys := [][]u8{cap: field_count} + mut encoded_vals := [][]u8{cap: field_count} + $for field in T.fields { + if !cbor_field_skipped(field) { + mut key := cbor_field_key(field) + if strategy != '' && !cbor_field_renamed(field) { + key = cbor_rename(field.name, strategy) + } + mut ksub := new_packer(EncodeOpts{ initial_cap: 16, canonical: true }) + ksub.pack_text(key) + encoded_keys << ksub.bytes().clone() + mut vsub := new_packer(EncodeOpts{ initial_cap: 16, canonical: true }) + $if field.typ is $option { + if val.$(field.name) == none { + vsub.pack_null() + } else { + vsub.pack(get_value_from_optional(val.$(field.name)))! + } + } $else { + vsub.pack(val.$(field.name))! + } + encoded_vals << vsub.bytes().clone() + } + } + mut idx := []int{len: field_count, init: index} + idx.sort_with_compare(fn [encoded_keys] (a &int, b &int) int { + return compare_canonical_keys(encoded_keys[*a], encoded_keys[*b]) + }) + for i in idx { + p.reserve(encoded_keys[i].len + encoded_vals[i].len) + unsafe { + p.buf.push_many(encoded_keys[i].data, encoded_keys[i].len) + p.buf.push_many(encoded_vals[i].data, encoded_vals[i].len) + } + } + } else { + $for field in T.fields { + if !cbor_field_skipped(field) { + mut key := cbor_field_key(field) + if strategy != '' && !cbor_field_renamed(field) { + key = cbor_rename(field.name, strategy) + } + p.pack_text(key) + $if field.typ is $option { + if val.$(field.name) == none { + p.pack_null() + } else { + p.pack(get_value_from_optional(val.$(field.name)))! + } + } $else { + p.pack(val.$(field.name))! + } + } + } + } + } $else { + p.pack_null() + } +} + +// get_value_from_optional unwraps an Option known to be `Some`. +// Its signature exists solely so V's generic inferrer can pick up the +// inner T at the comptime call site. +fn get_value_from_optional[T](val ?T) T { + return val or { T{} } +} + +// unpack reads one CBOR value from the buffer and converts it to T. +@[inline] +pub fn (mut u Unpacker) unpack[T]() !T { + $if T is RawMessage { + return u.unpack_raw()! + } $else $if T is Unmarshaler { + start := u.pos + u.skip_value()! + mut v := T{} + v.from_cbor(u.data[start..u.pos])! + return v + } $else $if T is Value { + return u.unpack_value()! + } $else $if T is time.Time { + return u.unpack_time()! + } $else $if T is string { + return u.unpack_text()! + } $else $if T is bool { + // Accept null as false-equivalent? No — strict by default. + return u.unpack_bool()! + } $else $if T is i8 { + v := u.unpack_int()! + if v < -128 || v > 127 { + return int_range(u.pos, 'i8', v.str()) + } + return i8(v) + } $else $if T is i16 { + v := u.unpack_int()! + if v < -32_768 || v > 32_767 { + return int_range(u.pos, 'i16', v.str()) + } + return i16(v) + } $else $if T is int { + v := u.unpack_int()! + if v < -2_147_483_648 || v > 2_147_483_647 { + return int_range(u.pos, 'int', v.str()) + } + return int(v) + } $else $if T is i32 { + v := u.unpack_int()! + if v < -2_147_483_648 || v > 2_147_483_647 { + return int_range(u.pos, 'i32', v.str()) + } + return i32(v) + } $else $if T is i64 { + return u.unpack_int()! + } $else $if T is u8 { + v := u.unpack_int()! + if v < 0 || v > 255 { + return int_range(u.pos, 'u8', v.str()) + } + return u8(v) + } $else $if T is u16 { + v := u.unpack_int()! + if v < 0 || v > 65_535 { + return int_range(u.pos, 'u16', v.str()) + } + return u16(v) + } $else $if T is u32 { + v := u.unpack_int()! + if v < 0 || v > 4_294_967_295 { + return int_range(u.pos, 'u32', v.str()) + } + return u32(v) + } $else $if T is u64 { + neg, mag := u.unpack_int_full()! + if neg { + return int_range(u.pos, 'u64', '-1 - ${mag}') + } + return mag + } $else $if T is f32 { + return f32(u.unpack_float()!) + } $else $if T is f64 { + return u.unpack_float()! + } $else $if T is $enum { + v := int(u.unpack_int()!) + return unsafe { T(v) } + } $else $if T is []u8 { + return u.unpack_bytes()! + } $else $if T is $array { + mut out := T{} + u.unpack_array_into(mut out)! + return out + } $else $if T is $map { + mut out := T{} + read_pairs_into_helper(mut u, mut out)! + return out + } $else $if T is $struct { + mut result := T{} + u.unpack_struct_into(mut result)! + return result + } $else { + return error('cbor: unsupported target type') + } +} + +fn (mut u Unpacker) unpack_array_into[E](mut out []E) ! { + hdr := u.unpack_array_header()! + if hdr < 0 { + // Indefinite. + for { + if u.peek_break() { + u.pos++ + break + } + out << u.unpack[E]()! + } + return + } + for _ in 0 .. hdr { + out << u.unpack[E]()! + } +} + +// read_pairs_into_helper is a standalone (non-method) generic function; +// V's generic-method dispatch can drop the second type parameter when +// invoked from a comptime $map branch, while the standalone form +// monomorphises correctly. +fn read_pairs_into_helper[K, V](mut u Unpacker, mut out map[K]V) ! { + hdr := u.unpack_map_header()! + if hdr < 0 { + for { + if u.peek_break() { + u.pos++ + break + } + key := u.unpack[K]()! + val := u.unpack[V]()! + if u.opts.deny_duplicate_keys && key in out { + return malformed(u.pos, 'duplicate map key') + } + out[key] = val + } + return + } + for _ in 0 .. hdr { + key := u.unpack[K]()! + val := u.unpack[V]()! + if u.opts.deny_duplicate_keys && key in out { + return malformed(u.pos, 'duplicate map key') + } + out[key] = val + } +} + +fn (mut u Unpacker) unpack_struct_into[T](mut result T) ! { + mut strategy := '' + $for attr in T.attributes { + if attr.name == 'cbor_rename_all' { + strategy = attr.arg + } + } + hdr := u.unpack_map_header()! + indef := hdr < 0 + mut remaining := if indef { i64(-1) } else { hdr } + for { + if indef { + if u.peek_break() { + u.pos++ + break + } + } else { + if remaining == 0 { + break + } + remaining-- + } + key_ptr, key_len := u.read_text_view()! + mut matched := false + $for field in T.fields { + if !cbor_field_skipped(field) { + mut name := cbor_field_key(field) + if strategy != '' && !cbor_field_renamed(field) { + name = cbor_rename(field.name, strategy) + } + if !matched && key_len == name.len + && unsafe { C.memcmp(key_ptr, name.str, key_len) } == 0 { + matched = true + $if field.typ is $option { + if u.pos < u.data.len && u.data[u.pos] == 0xf6 { + u.pos++ + result.$(field.name) = none + } else { + mut inner := create_value_from_optional(result.$(field.name)) + u.unpack_into(mut inner)! + result.$(field.name) = inner + } + } $else { + u.unpack_into(mut result.$(field.name))! + } + } + } + } + if !matched { + start := u.pos + u.skip_value()! + if u.opts.deny_unknown_fields { + return UnknownFieldError{ + pos: start + name: unsafe { tos(key_ptr, key_len) } + } + } + } + } +} + +// read_text_view returns a (ptr, len) view into the underlying buffer +// for one definite-length text string. Avoids allocation when matching +// struct field names. Errors on indefinite-length text since we'd have +// to copy chunks anyway. +@[direct_array_access] +fn (mut u Unpacker) read_text_view() !(&u8, int) { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 3 { + u.pos = start + return type_mismatch(start, 'text', b) + } + info := b & 0x1f + if info == 31 { + u.pos = start + return error('cbor: indefinite-length text not supported as map key (decoder)') + } + size := u.read_arg(info)! + if u.pos + int(size) > u.data.len { + return eof_needing(u.pos, int(size), u.data.len - u.pos) + } + if u.opts.validate_utf8 { + if !u.is_utf8_at(u.pos, int(size)) { + return InvalidUtf8Error{ + pos: u.pos + } + } + } + ptr := unsafe { &u8(u.data.data) + u.pos } + u.pos += int(size) + return ptr, int(size) +} + +@[direct_array_access; inline] +fn (u &Unpacker) is_utf8_at(start int, size int) bool { + if size == 0 { + return true + } + return utf8_validate_slice(u.data, start, size) +} + +// utf8_validate_slice runs the standard UTF-8 validator on a slice +// without making an intermediate copy. Mirrors the FSM used by +// `vlib/encoding/utf8/utf8_util.v`. The 8-byte SWAR pre-scan turns a +// pure-ASCII payload (the common case: JSON-shaped keys, identifiers) +// into one load + one mask + one branch per 8 bytes. +@[direct_array_access] +fn utf8_validate_slice(data []u8, start int, size int) bool { + mut i := start + end := start + size + for i < end { + // 8-byte SWAR ASCII fast path: a pure-ASCII run skips the + // per-byte FSM entirely. Triggers on every iteration so a single + // non-ASCII rune doesn't disable the fast path for the rest. + for i + 8 <= end { + chunk := unsafe { *(&u64(&data[i])) } + if chunk & 0x8080808080808080 != 0 { + break + } + i += 8 + } + if i >= end { + break + } + c := data[i] + if c < 0x80 { + i++ + continue + } + mut n := 0 + if c & 0xe0 == 0xc0 { + n = 2 + } else if c & 0xf0 == 0xe0 { + n = 3 + } else if c & 0xf8 == 0xf0 { + n = 4 + } else { + return false + } + if i + n > end { + return false + } + // Reject overlongs / surrogates / out-of-range. + match n { + 2 { + if c < 0xc2 { + return false + } + } + 3 { + b := data[i + 1] + if c == 0xe0 && b < 0xa0 { + return false + } + if c == 0xed && b > 0x9f { + return false + } + } + 4 { + b := data[i + 1] + if c == 0xf0 && b < 0x90 { + return false + } + if c == 0xf4 && b > 0x8f { + return false + } + if c > 0xf4 { + return false + } + } + else {} + } + + for k in 1 .. n { + if data[i + k] & 0xc0 != 0x80 { + return false + } + } + i += n + } + return true +} + +// create_value_from_optional returns a zero value of an Option's inner T. +// Exists so the comptime call site can infer T from a struct field. +fn create_value_from_optional[T](_val ?T) T { + return T{} +} + +// unpack_into fills the target through a mutable reference. The mut +// parameter exists so V's generic inferer picks up T from the +// `u.unpack_into(mut result.$(field.name))!` call site. +@[inline] +fn (mut u Unpacker) unpack_into[T](mut out T) ! { + _ = out // vet's "unused parameter" check doesn't track write-only mut args + out = u.unpack[T]()! +} + +// -------------------------------------------------------------------- +// time.Time decoding +// -------------------------------------------------------------------- + +fn (mut u Unpacker) unpack_time() !time.Time { + start := u.pos + b := u.read_byte()! + major := b >> 5 + if major != 6 { + u.pos = start + return type_mismatch(start, 'time tag', b) + } + number := u.read_arg(b & 0x1f)! + match number { + 0 { + s := u.unpack_text()! + return time.parse_iso8601(s) or { + return malformed(start, 'invalid RFC 3339 timestamp: ${err}') + } + } + 1 { + peek := u.peek_byte() or { return error('cbor: missing tag-1 content') } + major2 := peek >> 5 + if major2 == 0 || major2 == 1 { + secs := u.unpack_int()! + return time.unix(secs) + } + f := u.unpack_float()! + whole := i64(f) + frac := f - f64(whole) + ns := i64(frac * 1_000_000_000) + return time.unix_nanosecond(whole, int(ns)) + } + else { + u.pos = start + return malformed(start, 'unexpected tag ${number} for time.Time') + } + } +} + +// -------------------------------------------------------------------- +// Struct attribute helpers +// -------------------------------------------------------------------- + +fn cbor_field_skipped[F](field F) bool { + for attr in field.attrs { + if attr == 'skip' { + return true + } + if attr.starts_with('cbor:') { + if val := parse_cbor_attr(attr) { + if val == '-' { + return true + } + } + } + } + return false +} + +fn cbor_field_key[F](field F) string { + for attr in field.attrs { + if attr.starts_with('cbor:') { + if val := parse_cbor_attr(attr) { + if val != '-' && val != '' { + return val + } + } + } + } + return field.name +} + +fn cbor_field_renamed[F](field F) bool { + for attr in field.attrs { + if attr.starts_with('cbor:') { + if val := parse_cbor_attr(attr) { + if val != '-' && val != '' { + return true + } + } + } + } + return false +} + +fn cbor_rename(name string, strategy string) string { + match strategy { + 'snake_case' { return cbor_to_snake(name) } + 'camelCase' { return cbor_to_camel(name) } + 'PascalCase' { return cbor_to_pascal(name) } + 'kebab-case' { return cbor_to_kebab(name) } + 'SCREAMING_SNAKE_CASE' { return cbor_to_snake(name).to_upper() } + else { return name } + } +} + +fn cbor_to_snake(s string) string { + mut out := []u8{cap: s.len + 4} + for i, c in s { + if c >= `A` && c <= `Z` { + if i > 0 { + out << `_` + } + out << u8(c + 32) + } else { + out << c + } + } + return out.bytestr() +} + +fn cbor_to_camel(s string) string { + mut out := []u8{cap: s.len} + mut upper_next := false + for i, c in s { + if c == `_` { + upper_next = true + continue + } + if upper_next && c >= `a` && c <= `z` { + out << u8(c - 32) + upper_next = false + } else if i == 0 && c >= `A` && c <= `Z` { + out << u8(c + 32) + } else { + out << c + } + } + return out.bytestr() +} + +fn cbor_to_pascal(s string) string { + camel := cbor_to_camel(s) + if camel.len == 0 { + return camel + } + first := camel[0] + if first >= `a` && first <= `z` { + return u8(first - 32).ascii_str() + camel[1..] + } + return camel +} + +fn cbor_to_kebab(s string) string { + mut out := []u8{cap: s.len + 4} + for i, c in s { + if c >= `A` && c <= `Z` { + if i > 0 { + out << `-` + } + out << u8(c + 32) + } else if c == `_` { + out << `-` + } else { + out << c + } + } + return out.bytestr() +} + +fn parse_cbor_attr(attr string) ?string { + idx := attr.index(':') or { return none } + mut v := attr[idx + 1..].trim_space() + if v.len >= 2 && ((v.starts_with("'") && v.ends_with("'")) + || (v.starts_with('"') && v.ends_with('"'))) { + v = v[1..v.len - 1] + } + return v +} diff --git a/vlib/encoding/cbor/half.v b/vlib/encoding/cbor/half.v new file mode 100644 index 00000000000000..72c6561d838c45 --- /dev/null +++ b/vlib/encoding/cbor/half.v @@ -0,0 +1,114 @@ +module cbor + +import math + +// Half-precision (binary16) <-> f32/f64 conversion, integer-only. +// CBOR major type 7, additional info 25 carries an IEEE 754 binary16 value. +// V has no f16 type, so we synthesise the conversion via bit manipulation. +// +// Layout reminder (big-endian on the wire): +// binary16: 1 sign bit | 5 exponent bits (bias 15) | 10 mantissa bits +// binary32: 1 sign bit | 8 exponent bits (bias 127) | 23 mantissa bits +// binary64: 1 sign bit | 11 exponent bits (bias 1023) | 52 mantissa bits + +// IEEE 754 binary16 special-value bit patterns — used by the encoder when +// emitting NaN / ±Inf, and recognised by the decoder. The CBOR canonical +// quiet-NaN payload is 0x7e00 (RFC 8949 §3.3 / §4.2.2). +const half_qnan_bits = u16(0x7e00) +const half_pos_inf_bits = u16(0x7c00) +const half_neg_inf_bits = u16(0xfc00) + +// half_to_f64 expands a 16-bit IEEE 754 value (as a u16) to an f64. Inf and +// NaN are preserved as the corresponding f64 representations; subnormals +// are converted exactly. +@[inline] +fn half_to_f64(h u16) f64 { + sign := u64(h & 0x8000) << 48 + exp := int((h >> 10) & 0x1f) + mant := u64(h & 0x3ff) + if exp == 0 { + if mant == 0 { + return math.f64_from_bits(sign) // ±0 + } + // Subnormal binary16: value = mant * 2^-24. Renormalize for f64. + mut m := mant + mut e := 1 + for m & 0x400 == 0 { + m <<= 1 + e++ + } + m &= 0x3ff + // Unbiased exp16 = 1 - 15 - (e - 1) = -14 - (e - 1) = -13 - e ; biased f64 = exp + 1023 + f64_exp := u64(1023 - 14 - (e - 1)) << 52 + return math.f64_from_bits(sign | f64_exp | (m << 42)) + } + if exp == 0x1f { + // Inf or NaN. + f64_exp := u64(0x7ff) << 52 + return math.f64_from_bits(sign | f64_exp | (mant << 42)) + } + // Normal binary16. + f64_exp := u64(exp - 15 + 1023) << 52 + return math.f64_from_bits(sign | f64_exp | (mant << 42)) +} + +// f32_to_half tries to round-trip a binary32 value into binary16. Returns +// (bits, true) when the conversion is exact (lossless), otherwise the +// boolean is false. NaN is mapped to the canonical quiet NaN 0x7e00 and +// reported as exact, since the CBOR preferred-serialisation rule +// (RFC 8949 §4.2.2) authorises that mapping for any NaN payload. +@[inline] +fn f32_to_half(v f32) (u16, bool) { + bits := math.f32_bits(v) + sign := u16((bits >> 16) & 0x8000) + exp32 := int((bits >> 23) & 0xff) + mant32 := bits & 0x7fffff + // Zero. + if exp32 == 0 && mant32 == 0 { + return sign, true + } + // Inf. + if exp32 == 0xff { + if mant32 == 0 { + return sign | 0x7c00, true + } + // NaN: collapse to canonical quiet NaN. + return sign | 0x7e00, true + } + // Real value with unbiased exponent. + exp_real := exp32 - 127 + if exp_real > 15 { + return 0, false // would overflow to ±inf, not lossless + } + if exp_real >= -14 { + // Normal range in binary16: low 13 bits of f32 mantissa must be zero. + if mant32 & 0x1fff != 0 { + return 0, false + } + half_exp := u16(u32(exp_real + 15) << 10) + return sign | half_exp | u16(mant32 >> 13), true + } + if exp_real >= -24 { + // Subnormal in binary16. Build the implicit-leading-1 mantissa and + // check the dropped low bits are all zero. + shift := u32(-exp_real - 1) + full := u32(mant32 | (u32(1) << 23)) + mask := (u32(1) << shift) - 1 + if full & mask != 0 { + return 0, false + } + return sign | u16(full >> shift), true + } + return 0, false +} + +// f64_to_half_via_f32 returns half bits for an f64 only when both +// f64 -> f32 and f32 -> f16 are lossless. +@[inline] +fn f64_to_half(v f64) (u16, bool) { + f := f32(v) + if f64(f) != v { + return 0, false + } + return f32_to_half(f) +} diff --git a/vlib/encoding/cbor/marshaler.v b/vlib/encoding/cbor/marshaler.v new file mode 100644 index 00000000000000..8b7bebcd25a8c8 --- /dev/null +++ b/vlib/encoding/cbor/marshaler.v @@ -0,0 +1,31 @@ +module cbor + +// Marshaler lets a user type control its own CBOR encoding. Returned +// bytes must be exactly one well-formed CBOR data item — the generic +// encoder copies them verbatim, so malformed output corrupts the +// surrounding stream. +// +// Example: +// +// struct Ipv4 { mut: octets [4]u8 } +// +// pub fn (ip Ipv4) to_cbor() []u8 { +// mut p := cbor.new_packer(cbor.EncodeOpts{}) +// p.pack_bytes(ip.octets[..]) +// return p.bytes().clone() +// } +pub interface Marshaler { + to_cbor() []u8 +} + +// Unmarshaler is the reverse: given the bytes of one CBOR data item, +// populate the receiver. The slice is already trimmed to exactly one +// item by the generic decoder. +// +// Implementers use a mut receiver: +// +// pub fn (mut ip Ipv4) from_cbor(data []u8) ! { ... } +pub interface Unmarshaler { +mut: + from_cbor(data []u8) ! +} diff --git a/vlib/encoding/cbor/raw.v b/vlib/encoding/cbor/raw.v new file mode 100644 index 00000000000000..50710b0b3069c1 --- /dev/null +++ b/vlib/encoding/cbor/raw.v @@ -0,0 +1,38 @@ +module cbor + +// RawMessage holds the byte-exact encoding of one CBOR data item as it +// appeared on the wire. Useful for caching/forwarding code that wants to +// defer decoding of a nested field. +// +// struct Envelope { +// id int +// payload cbor.RawMessage // bytes preserved as-is +// } +// +// raw := cbor.decode[cbor.RawMessage](bytes)! +// back := cbor.encode(raw, cbor.EncodeOpts{})! // identical bytes +pub struct RawMessage { +pub mut: + data []u8 +} + +// pack_raw appends a RawMessage's bytes to the Packer without re-encoding. +@[inline] +pub fn (mut p Packer) pack_raw(raw RawMessage) { + if raw.data.len == 0 { + return + } + p.reserve(raw.data.len) + unsafe { p.buf.push_many(raw.data.data, raw.data.len) } +} + +// unpack_raw captures the bytes of the next value without building a +// Value tree. Returns an owned clone, safe to outlive the unpacker. +@[direct_array_access] +pub fn (mut u Unpacker) unpack_raw() !RawMessage { + start := u.pos + u.skip_value()! + return RawMessage{ + data: u.data[start..u.pos].clone() + } +} diff --git a/vlib/encoding/cbor/stream.v b/vlib/encoding/cbor/stream.v new file mode 100644 index 00000000000000..5e67cf296563f6 --- /dev/null +++ b/vlib/encoding/cbor/stream.v @@ -0,0 +1,65 @@ +module cbor + +import io + +// Stream I/O wrappers over the standard `io.Reader` / `io.Writer` +// interfaces. Use these for files, sockets, pipes — anywhere the +// payload doesn't fit cleanly in a single `[]u8`. + +// encode_to serialises `val` into an internal buffer, then writes the +// bytes to `w` in a loop until everything is accepted. Errors on the +// first I/O failure. +pub fn encode_to[T](val T, mut w io.Writer, opts EncodeOpts) ! { + bytes := encode[T](val, opts)! + mut written := 0 + for written < bytes.len { + n := w.write(bytes[written..])! + if n == 0 { + return error('cbor: writer stopped accepting bytes at ${written}/${bytes.len}') + } + written += n + } +} + +// decode_from reads bytes from `r` until EOF (or until +// `DecodeOpts.max_stream_bytes` is hit) and decodes a single top-level +// value. For multi-value streams, use `Unpacker` directly on a +// pre-buffered slice. +// +// Always set `max_stream_bytes` on untrusted readers — otherwise a peer +// that never sends EOF blocks the call forever. +pub fn decode_from[T](mut r io.Reader, opts DecodeOpts) !T { + if opts.max_stream_bytes <= 0 { + data := io.read_all(reader: r, read_to_end_of_stream: true)! + return decode[T](data, opts)! + } + mut buf := []u8{cap: 4096} + for { + if buf.len >= opts.max_stream_bytes { + return error('cbor: stream exceeded max_stream_bytes (${opts.max_stream_bytes})') + } + slot_cap := opts.max_stream_bytes - buf.len + slot_len := if slot_cap < 4096 { slot_cap } else { 4096 } + mut slot := []u8{len: slot_len} + n := r.read(mut slot) or { break } + if n == 0 { + break + } + buf << slot[..n] + } + return decode[T](buf, opts)! +} + +// pack_to is the streaming sibling of `encode_to`, for users who built +// their payload manually via the `Packer` API. +pub fn (mut p Packer) pack_to(mut w io.Writer) ! { + bytes := p.bytes() + mut written := 0 + for written < bytes.len { + n := w.write(bytes[written..])! + if n == 0 { + return error('cbor: writer stopped at ${written}/${bytes.len}') + } + written += n + } +} diff --git a/vlib/encoding/cbor/tags.v b/vlib/encoding/cbor/tags.v new file mode 100644 index 00000000000000..4f91a160a8b58b --- /dev/null +++ b/vlib/encoding/cbor/tags.v @@ -0,0 +1,22 @@ +module cbor + +// Well-known CBOR tag numbers from the IANA registry. These are not the +// only valid tags; users may emit any u64 tag via `Packer.pack_tag`. + +pub const tag_date_time = u64(0) // RFC 3339 date/time text string +pub const tag_epoch = u64(1) // POSIX epoch seconds (int or float) +pub const tag_unsigned_bignum = u64(2) // byte string, big-endian magnitude +pub const tag_negative_bignum = u64(3) // byte string, -(1 + n) +pub const tag_decimal_fraction = u64(4) // [exponent, mantissa] with base 10 +pub const tag_bigfloat = u64(5) // [exponent, mantissa] with base 2 +pub const tag_base64url_hint = u64(21) +pub const tag_base64_hint = u64(22) +pub const tag_base16_hint = u64(23) +pub const tag_embedded_cbor = u64(24) // byte string holding well-formed CBOR +pub const tag_uri = u64(32) // RFC 3986 URI as text string +pub const tag_base64url = u64(33) +pub const tag_base64 = u64(34) +pub const tag_self_describe = u64(55799) // CBOR magic prefix + +// Magic prefix `d9d9f7` produced when wrapping any value in tag 55799. +pub const self_describe_prefix = [u8(0xd9), 0xd9, 0xf7] diff --git a/vlib/encoding/cbor/tests/appendix_a.json b/vlib/encoding/cbor/tests/appendix_a.json new file mode 100644 index 00000000000000..40d3b56223f1a8 --- /dev/null +++ b/vlib/encoding/cbor/tests/appendix_a.json @@ -0,0 +1,636 @@ +[ + { + "cbor": "AA==", + "hex": "00", + "roundtrip": true, + "decoded": 0 + }, + { + "cbor": "AQ==", + "hex": "01", + "roundtrip": true, + "decoded": 1 + }, + { + "cbor": "Cg==", + "hex": "0a", + "roundtrip": true, + "decoded": 10 + }, + { + "cbor": "Fw==", + "hex": "17", + "roundtrip": true, + "decoded": 23 + }, + { + "cbor": "GBg=", + "hex": "1818", + "roundtrip": true, + "decoded": 24 + }, + { + "cbor": "GBk=", + "hex": "1819", + "roundtrip": true, + "decoded": 25 + }, + { + "cbor": "GGQ=", + "hex": "1864", + "roundtrip": true, + "decoded": 100 + }, + { + "cbor": "GQPo", + "hex": "1903e8", + "roundtrip": true, + "decoded": 1000 + }, + { + "cbor": "GgAPQkA=", + "hex": "1a000f4240", + "roundtrip": true, + "decoded": 1000000 + }, + { + "cbor": "GwAAAOjUpRAA", + "hex": "1b000000e8d4a51000", + "roundtrip": true, + "decoded": 1000000000000 + }, + { + "cbor": "G///////////", + "hex": "1bffffffffffffffff", + "roundtrip": true, + "decoded": 18446744073709551615 + }, + { + "cbor": "wkkBAAAAAAAAAAA=", + "hex": "c249010000000000000000", + "roundtrip": true, + "decoded": 18446744073709551616 + }, + { + "cbor": "O///////////", + "hex": "3bffffffffffffffff", + "roundtrip": true, + "decoded": -18446744073709551616 + }, + { + "cbor": "w0kBAAAAAAAAAAA=", + "hex": "c349010000000000000000", + "roundtrip": true, + "decoded": -18446744073709551617 + }, + { + "cbor": "IA==", + "hex": "20", + "roundtrip": true, + "decoded": -1 + }, + { + "cbor": "KQ==", + "hex": "29", + "roundtrip": true, + "decoded": -10 + }, + { + "cbor": "OGM=", + "hex": "3863", + "roundtrip": true, + "decoded": -100 + }, + { + "cbor": "OQPn", + "hex": "3903e7", + "roundtrip": true, + "decoded": -1000 + }, + { + "cbor": "+QAA", + "hex": "f90000", + "roundtrip": true, + "decoded": 0.0 + }, + { + "cbor": "+YAA", + "hex": "f98000", + "roundtrip": true, + "decoded": -0.0 + }, + { + "cbor": "+TwA", + "hex": "f93c00", + "roundtrip": true, + "decoded": 1.0 + }, + { + "cbor": "+z/xmZmZmZma", + "hex": "fb3ff199999999999a", + "roundtrip": true, + "decoded": 1.1 + }, + { + "cbor": "+T4A", + "hex": "f93e00", + "roundtrip": true, + "decoded": 1.5 + }, + { + "cbor": "+Xv/", + "hex": "f97bff", + "roundtrip": true, + "decoded": 65504.0 + }, + { + "cbor": "+kfDUAA=", + "hex": "fa47c35000", + "roundtrip": true, + "decoded": 100000.0 + }, + { + "cbor": "+n9///8=", + "hex": "fa7f7fffff", + "roundtrip": true, + "decoded": 3.4028234663852886e+38 + }, + { + "cbor": "+3435DyIAHWc", + "hex": "fb7e37e43c8800759c", + "roundtrip": true, + "decoded": 1.0e+300 + }, + { + "cbor": "+QAB", + "hex": "f90001", + "roundtrip": true, + "decoded": 5.960464477539063e-08 + }, + { + "cbor": "+QQA", + "hex": "f90400", + "roundtrip": true, + "decoded": 6.103515625e-05 + }, + { + "cbor": "+cQA", + "hex": "f9c400", + "roundtrip": true, + "decoded": -4.0 + }, + { + "cbor": "+8AQZmZmZmZm", + "hex": "fbc010666666666666", + "roundtrip": true, + "decoded": -4.1 + }, + { + "cbor": "+XwA", + "hex": "f97c00", + "roundtrip": true, + "diagnostic": "Infinity" + }, + { + "cbor": "+X4A", + "hex": "f97e00", + "roundtrip": true, + "diagnostic": "NaN" + }, + { + "cbor": "+fwA", + "hex": "f9fc00", + "roundtrip": true, + "diagnostic": "-Infinity" + }, + { + "cbor": "+n+AAAA=", + "hex": "fa7f800000", + "roundtrip": false, + "diagnostic": "Infinity" + }, + { + "cbor": "+n/AAAA=", + "hex": "fa7fc00000", + "roundtrip": false, + "diagnostic": "NaN" + }, + { + "cbor": "+v+AAAA=", + "hex": "faff800000", + "roundtrip": false, + "diagnostic": "-Infinity" + }, + { + "cbor": "+3/wAAAAAAAA", + "hex": "fb7ff0000000000000", + "roundtrip": false, + "diagnostic": "Infinity" + }, + { + "cbor": "+3/4AAAAAAAA", + "hex": "fb7ff8000000000000", + "roundtrip": false, + "diagnostic": "NaN" + }, + { + "cbor": "+//wAAAAAAAA", + "hex": "fbfff0000000000000", + "roundtrip": false, + "diagnostic": "-Infinity" + }, + { + "cbor": "9A==", + "hex": "f4", + "roundtrip": true, + "decoded": false + }, + { + "cbor": "9Q==", + "hex": "f5", + "roundtrip": true, + "decoded": true + }, + { + "cbor": "9g==", + "hex": "f6", + "roundtrip": true, + "decoded": null + }, + { + "cbor": "9w==", + "hex": "f7", + "roundtrip": true, + "diagnostic": "undefined" + }, + { + "cbor": "8A==", + "hex": "f0", + "roundtrip": true, + "diagnostic": "simple(16)" + }, + { + "cbor": "+Bg=", + "hex": "f818", + "roundtrip": true, + "diagnostic": "simple(24)" + }, + { + "cbor": "+P8=", + "hex": "f8ff", + "roundtrip": true, + "diagnostic": "simple(255)" + }, + { + "cbor": "wHQyMDEzLTAzLTIxVDIwOjA0OjAwWg==", + "hex": "c074323031332d30332d32315432303a30343a30305a", + "roundtrip": true, + "diagnostic": "0(\"2013-03-21T20:04:00Z\")" + }, + { + "cbor": "wRpRS2ew", + "hex": "c11a514b67b0", + "roundtrip": true, + "diagnostic": "1(1363896240)" + }, + { + "cbor": "wftB1FLZ7CAAAA==", + "hex": "c1fb41d452d9ec200000", + "roundtrip": true, + "diagnostic": "1(1363896240.5)" + }, + { + "cbor": "10QBAgME", + "hex": "d74401020304", + "roundtrip": true, + "diagnostic": "23(h'01020304')" + }, + { + "cbor": "2BhFZElFVEY=", + "hex": "d818456449455446", + "roundtrip": true, + "diagnostic": "24(h'6449455446')" + }, + { + "cbor": "2CB2aHR0cDovL3d3dy5leGFtcGxlLmNvbQ==", + "hex": "d82076687474703a2f2f7777772e6578616d706c652e636f6d", + "roundtrip": true, + "diagnostic": "32(\"http://www.example.com\")" + }, + { + "cbor": "QA==", + "hex": "40", + "roundtrip": true, + "diagnostic": "h''" + }, + { + "cbor": "RAECAwQ=", + "hex": "4401020304", + "roundtrip": true, + "diagnostic": "h'01020304'" + }, + { + "cbor": "YA==", + "hex": "60", + "roundtrip": true, + "decoded": "" + }, + { + "cbor": "YWE=", + "hex": "6161", + "roundtrip": true, + "decoded": "a" + }, + { + "cbor": "ZElFVEY=", + "hex": "6449455446", + "roundtrip": true, + "decoded": "IETF" + }, + { + "cbor": "YiJc", + "hex": "62225c", + "roundtrip": true, + "decoded": "\"\\" + }, + { + "cbor": "YsO8", + "hex": "62c3bc", + "roundtrip": true, + "decoded": "ü" + }, + { + "cbor": "Y+awtA==", + "hex": "63e6b0b4", + "roundtrip": true, + "decoded": "水" + }, + { + "cbor": "ZPCQhZE=", + "hex": "64f0908591", + "roundtrip": true, + "decoded": "𐅑" + }, + { + "cbor": "gA==", + "hex": "80", + "roundtrip": true, + "decoded": [ + + ] + }, + { + "cbor": "gwECAw==", + "hex": "83010203", + "roundtrip": true, + "decoded": [ + 1, + 2, + 3 + ] + }, + { + "cbor": "gwGCAgOCBAU=", + "hex": "8301820203820405", + "roundtrip": true, + "decoded": [ + 1, + [ + 2, + 3 + ], + [ + 4, + 5 + ] + ] + }, + { + "cbor": "mBkBAgMEBQYHCAkKCwwNDg8QERITFBUWFxgYGBk=", + "hex": "98190102030405060708090a0b0c0d0e0f101112131415161718181819", + "roundtrip": true, + "decoded": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25 + ] + }, + { + "cbor": "oA==", + "hex": "a0", + "roundtrip": true, + "decoded": { + } + }, + { + "cbor": "ogECAwQ=", + "hex": "a201020304", + "roundtrip": true, + "diagnostic": "{1: 2, 3: 4}" + }, + { + "cbor": "omFhAWFiggID", + "hex": "a26161016162820203", + "roundtrip": true, + "decoded": { + "a": 1, + "b": [ + 2, + 3 + ] + } + }, + { + "cbor": "gmFhoWFiYWM=", + "hex": "826161a161626163", + "roundtrip": true, + "decoded": [ + "a", + { + "b": "c" + } + ] + }, + { + "cbor": "pWFhYUFhYmFCYWNhQ2FkYURhZWFF", + "hex": "a56161614161626142616361436164614461656145", + "roundtrip": true, + "decoded": { + "a": "A", + "b": "B", + "c": "C", + "d": "D", + "e": "E" + } + }, + { + "cbor": "X0IBAkMDBAX/", + "hex": "5f42010243030405ff", + "roundtrip": false, + "diagnostic": "(_ h'0102', h'030405')" + }, + { + "cbor": "f2VzdHJlYWRtaW5n/w==", + "hex": "7f657374726561646d696e67ff", + "roundtrip": false, + "decoded": "streaming" + }, + { + "cbor": "n/8=", + "hex": "9fff", + "roundtrip": false, + "decoded": [ + + ] + }, + { + "cbor": "nwGCAgOfBAX//w==", + "hex": "9f018202039f0405ffff", + "roundtrip": false, + "decoded": [ + 1, + [ + 2, + 3 + ], + [ + 4, + 5 + ] + ] + }, + { + "cbor": "nwGCAgOCBAX/", + "hex": "9f01820203820405ff", + "roundtrip": false, + "decoded": [ + 1, + [ + 2, + 3 + ], + [ + 4, + 5 + ] + ] + }, + { + "cbor": "gwGCAgOfBAX/", + "hex": "83018202039f0405ff", + "roundtrip": false, + "decoded": [ + 1, + [ + 2, + 3 + ], + [ + 4, + 5 + ] + ] + }, + { + "cbor": "gwGfAgP/ggQF", + "hex": "83019f0203ff820405", + "roundtrip": false, + "decoded": [ + 1, + [ + 2, + 3 + ], + [ + 4, + 5 + ] + ] + }, + { + "cbor": "nwECAwQFBgcICQoLDA0ODxAREhMUFRYXGBgYGf8=", + "hex": "9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff", + "roundtrip": false, + "decoded": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25 + ] + }, + { + "cbor": "v2FhAWFinwID//8=", + "hex": "bf61610161629f0203ffff", + "roundtrip": false, + "decoded": { + "a": 1, + "b": [ + 2, + 3 + ] + } + }, + { + "cbor": "gmFhv2FiYWP/", + "hex": "826161bf61626163ff", + "roundtrip": false, + "decoded": [ + "a", + { + "b": "c" + } + ] + }, + { + "cbor": "v2NGdW71Y0FtdCH/", + "hex": "bf6346756ef563416d7421ff", + "roundtrip": false, + "decoded": { + "Fun": true, + "Amt": -2 + } + } +] diff --git a/vlib/encoding/cbor/tests/canonical_test.v b/vlib/encoding/cbor/tests/canonical_test.v new file mode 100644 index 00000000000000..2b0fd17fa36481 --- /dev/null +++ b/vlib/encoding/cbor/tests/canonical_test.v @@ -0,0 +1,148 @@ +// Canonical encoding (RFC 8949 §4.2.1, "core deterministic encoding"): +// map keys are sorted by length-first lexicographic order of their +// encoded forms. Used for hashable / signable payloads. +module main + +import encoding.cbor +import encoding.hex + +fn h(s string) []u8 { + return hex.decode(s) or { panic('invalid hex: ${s}') } +} + +fn beq(a []u8, b []u8) bool { + if a.len != b.len { + return false + } + for i in 0 .. a.len { + if a[i] != b[i] { + return false + } + } + return true +} + +fn test_canonical_sorts_text_keys() { + // Build a map with reverse-alphabetic insertion order; canonical + // output should still emit keys "a", "b", "c", "d", "e". + v := cbor.Value(cbor.Map{ + pairs: [ + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'e' + }) + value: cbor.Value(cbor.Text{ + value: 'E' + }) + }, + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'b' + }) + value: cbor.Value(cbor.Text{ + value: 'B' + }) + }, + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'd' + }) + value: cbor.Value(cbor.Text{ + value: 'D' + }) + }, + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'a' + }) + value: cbor.Value(cbor.Text{ + value: 'A' + }) + }, + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'c' + }) + value: cbor.Value(cbor.Text{ + value: 'C' + }) + }, + ] + }) + got := cbor.encode_value(v, cbor.EncodeOpts{ canonical: true }) + want := h('a56161614161626142616361436164614461656145') + assert beq(got, want), 'canonical: got ${hex.encode(got)}, want ${hex.encode(want)}' +} + +fn test_canonical_length_first_then_lex() { + // Length-first ordering: shorter keys first. + // {"a": 1, "aa": 2} → short before long. + v := cbor.Value(cbor.Map{ + pairs: [ + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'aa' + }) + value: cbor.Value(cbor.new_uint(2)) + }, + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'a' + }) + value: cbor.Value(cbor.new_uint(1)) + }, + ] + }) + got := cbor.encode_value(v, cbor.EncodeOpts{ canonical: true }) + // Encoded keys "a"=0x6161 (2 bytes), "aa"=0x626161 (3 bytes). + // Length-first: "a" first, then "aa". + // Result: a2 61 61 01 62 61 61 02 + want := h('a2616101626161 02'.replace(' ', '')) + assert beq(got, want), 'length-first: got ${hex.encode(got)}' +} + +fn test_self_describe_prefix() { + bytes := cbor.encode[u64](u64(0), cbor.EncodeOpts{ self_describe: true })! + // Magic prefix: d9 d9 f7 then 0x00. + assert beq(bytes, [u8(0xd9), 0xd9, 0xf7, 0x00]) +} + +// --------------------------------------------------------------------- +// Struct-as-map canonical encoding: declaration order MUST NOT leak +// into the wire form when canonical mode is on. Otherwise hash- or +// signature-based payloads (COSE, CWT, DAG-CBOR) lose stability across +// V versions whenever a field is added or reordered in source. +// --------------------------------------------------------------------- + +struct OutOfOrder { + zeta int + alpha int + mid int +} + +fn test_canonical_struct_sorts_keys_by_encoded_form() { + v := OutOfOrder{ + zeta: 1 + alpha: 2 + mid: 3 + } + got := cbor.encode[OutOfOrder](v, cbor.EncodeOpts{ canonical: true })! + // Length-first lex on encoded keys: "mid" (4B) < "zeta" (5B) < "alpha" (6B). + // a3 636d6964 03 647a657461 01 65616c706861 02 + want := h('a3636d69640364 7a65746101 65616c706861 02'.replace(' ', '')) + assert beq(got, want), 'declaration order leaked: got ${hex.encode(got)}' +} + +fn test_canonical_struct_preserves_declaration_order_when_off() { + // Default (non-canonical) keeps source order — important for human + // inspection and matches the documented permissive behaviour. + v := OutOfOrder{ + zeta: 1 + alpha: 2 + mid: 3 + } + got := cbor.encode[OutOfOrder](v, cbor.EncodeOpts{})! + // a3 647a657461 01 65616c706861 02 636d6964 03 + want := h('a3647a65746101 65616c706861 02 636d6964 03'.replace(' ', '')) + assert beq(got, want), 'non-canonical reorder: got ${hex.encode(got)}' +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt0.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt0.edn new file mode 100644 index 00000000000000..5ca0f62d4dc6cc --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt0.edn @@ -0,0 +1,61 @@ +{ + "title": "mt0", + "description": "Plain CBOR integers storable as major type 0 (mt0), from RFC 8949 appendix A", + "tests": [ + { + "description": "mt0 zero", + "encoded": h'00', + "decoded": 0, + }, + { + "description": "mt0 one", + "encoded": h'01', + "decoded": 1, + }, + { + "description": "mt0 ten, not newline", + "encoded": h'0a', + "decoded": 10, + }, + { + "description": "mt0 largest 0-byte", + "encoded": h'17', + "decoded": 23, + }, + { + "description": "mt0 smallest 1-byte", + "encoded": h'1818', + "decoded": 24, + }, + { + "description": "mt0 second 1-byte", + "encoded": h'1819', + "decoded": 25, + }, + { + "description": "mt0 100", + "encoded": h'1864', + "decoded": 100, + }, + { + "description": "mt0 1000", + "encoded": h'1903e8', + "decoded": 1000, + }, + { + "description": "mt0 1000000", + "encoded": h'1a000f4240', + "decoded": 1000000, + }, + { + "description": "mt0 1000000000000", + "encoded": h'1b000000e8d4a51000', + "decoded": 1000000000000, + }, + { + "description": "mt0 largest", + "encoded": h'1bffffffffffffffff', + "decoded": 18446744073709551615, + }, + ] +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt1.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt1.edn new file mode 100644 index 00000000000000..e223abd78a7013 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt1.edn @@ -0,0 +1,31 @@ +{ + "title": "mt1", + "description": "Plain negative CBOR integers storable as major type 1 (mt1), from RFC 8949 appendix A", + "tests": [ + { + "description": "mt1 minimum", + "encoded": h'3bffffffffffffffff', + "decoded": -18446744073709551616, + }, + { + "description": "mt1 maximum", + "encoded": h'20', + "decoded": -1, + }, + { + "description": "mt1 -10", + "encoded": h'29', + "decoded": -10, + } + { + "description": "mt1 -100", + "encoded": h'3863', + "decoded": -100, + } + { + "description": "mt1 -1000", + "encoded": h'3903e7', + "decoded": -1000, + } + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt2.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt2.edn new file mode 100644 index 00000000000000..ddf6075e482d18 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt2.edn @@ -0,0 +1,16 @@ +{ + "title": "mt2", + "description": "Byte strings, from RFC 8949 appendix A", + "tests": [ + { + "description": "empty", + "encoded": h'40', + "decoded": h'', + }, + { + "description": "four bytes in a byte string", + "encoded": h'4401020304', + "decoded": h'01020304', + }, + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt3.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt3.edn new file mode 100644 index 00000000000000..6d13a4dc7ebbdd --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt3.edn @@ -0,0 +1,41 @@ +{ + "title": "mt3", + "description": "UTF8 strings, from RFC 8949 appendix A", + "tests": [ + { + "description": "Empty string", + "encoded": h'60', + "decoded": "", + }, + { + "description": "Short string", + "encoded": h'6161', + "decoded": "a", + }, + { + "description": "Important string", + "encoded": h'6449455446', + "decoded": "IETF", + }, + { + "description": "Escaped string", + "encoded": h'62225c', + "decoded": "\"\\", + }, + { + "description": "String with short Unicode escape", + "encoded": h'62c3bc', + "decoded": "\u00fc", + }, + { + "description": "String with long Unicode escape", + "encoded": h'63e6b0b4', + "decoded": "\u6c34", + }, + { + "description": "String with UTF-16 Unicode escapes", + "encoded": h'64f0908591', + "decoded": "\ud800\udd51", + }, + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt4.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt4.edn new file mode 100644 index 00000000000000..2eed4342380816 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt4.edn @@ -0,0 +1,26 @@ +{ + "title": "mt4", + "description": "Arrays, from RFC 8949 appendix A", + "tests": [ + { + "description": "Empty Array", + "encoded": h'80', + "decoded": [], + }, + { + "description": "Short Array", + "encoded": h'83010203', + "decoded": [1, 2, 3], + }, + { + "description": "Nested Arrays", + "encoded": h'8301820203820405', + "decoded": [1, [2, 3], [4, 5]], + }, + { + "description": "Longer array", + "encoded": h'98190102030405060708090a0b0c0d0e0f101112131415161718181819', + "decoded": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], + }, + ] +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt5.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt5.edn new file mode 100644 index 00000000000000..318b8ba5f78cb6 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt5.edn @@ -0,0 +1,31 @@ +{ + "title": "mt5", + "description": "Maps, from RFC 8949 appendix A", + "tests": [ + { + "description": "Empty Map", + "encoded": h'a0', + "decoded": {}, + }, + { + "description": "Map with integer keys", + "encoded": h'a201020304', + "decoded": {1: 2, 3: 4}, + }, + { + "description": "Array nested in Map with string keys", + "encoded": h'a26161016162820203', + "decoded": {"a": 1, "b": [2, 3]}, + }, + { + "description": "Map nested in array", + "encoded": h'826161a161626163', + "decoded": ["a", {"b": "c"}], + }, + { + "description": "Map with more keys", + "encoded": h'a56161614161626142616361436164614461656145', + "decoded": {"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}, + }, + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt6.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt6.edn new file mode 100644 index 00000000000000..55ce56d3d0a203 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt6.edn @@ -0,0 +1,51 @@ +{ + "title": "mt6", + "description": "Tags, from RFC 8949 appendix A", + "tests": [ + { + "description": "Bigint" + "encoded": h'c249010000000000000000', + "decoded": 18446744073709551616, + }, + { + "description": "Negative Bigint" + "encoded": h'c349010000000000000000', + "decoded": -18446744073709551617, + }, + { + "description": "RFC3339 Date/Time String" + "encoded": h'c074323031332d30332d32315432303a30343a30305a', + "decoded": 0("2013-03-21T20:04:00Z"), + "encodeOptions": { + "dateTag": 0, + }, + }, + { + "description": "Date/time as integer offset from epoch", + "encoded": h'c11a514b67b0', + "decoded": 1(1363896240), + }, + { + "description": "Date/time as float offset from epoch", + "encoded": h'c1fb41d452d9ec200000', + "decoded": 1(1363896240.5), + }, + { + "description": "Expected conversion to base16", + "encoded": h'd74401020304', + "decoded": 23(h'01020304') + }, + { + "description": "Expected conversion to base16 encoding", + "encoded": h'd818456449455446', + "decoded": 24(h'6449455446'), + }, + { + # Note: RFC 8949 doesn't include trailing slash, which should probably + # be there. + "description": "URL", + "encoded": h'd82077687474703a2f2f7777772e6578616d706c652e636f6d2f' + "decoded": 32("http://www.example.com/"), + }, + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt7-float.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt7-float.edn new file mode 100644 index 00000000000000..bdf2f96bdb159c --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt7-float.edn @@ -0,0 +1,131 @@ +{ + "title": "mt7-float", + "description": "Major type 7 (mt7) floats, from RFC 8949 appendix A", + "encodeOptions": { + "avoidInts": true, + }, + "tests": [ + { + "description": "Zero", + "encoded": h'f90000', + "decoded": 0.0, + }, + { + "description": "Negative zero", + "encoded": h'f98000', + "decoded": -0.0, + }, + { + "description": "One", + "encoded": h'f93c00', + "decoded": 1.0, + }, + { + "description": "1.1 does not fit into a smaller float", + "encoded": h'fb3ff199999999999a', + "decoded": 1.1, + }, + { + "description": "1.5 collapses to an f16" + "encoded": h'f93e00', + "decoded": 1.5, + }, + { + "description": "Largest f16" + "encoded": h'f97bff', + "decoded": 65504.0, + }, + { + "description": "100000" + "encoded": h'fa47c35000', + "decoded": 100000.0, + }, + { + "description": "Largest f32" + "encoded": h'fa7f7fffff', + "decoded": 3.4028234663852886e+38, + }, + { + "description": "Large round f64" + "encoded": h'fb7e37e43c8800759c', + "decoded": 1.0e+300, + }, + { + "description": "Smallest subnormal f16" + "encoded": h'f90001', + "decoded": 5.960464477539063e-8, + }, + { + "description": "Smallest normal f16" + "encoded": h'f90400', + "decoded": 0.00006103515625, + }, + { + "description": "Negative f16" + "encoded": h'f9c400', + "decoded": -4.0, + }, + { + "description": "-4.1 does not reduce" + "encoded": h'fbc010666666666666', + "decoded": -4.1, + }, + { + "description": "Positive infinity" + "encoded": h'f97c00', + "decoded": Infinity, + }, + { + "description": "Trivial NaN" + "encoded": h'f97e00', + "decoded": NaN, + }, + { + "description": "Negative infinity" + "encoded": h'f9fc00', + "decoded": -Infinity, + }, + { + "description": "Infinity coded as f32 instead of f16", + # Can't round-trip to non-preferred + "roundtrip": false, + "encoded": h'fa7f800000', + "decoded": Infinity, + }, + { + "description": "Trivial NaN coded as f32 instead of f16", + # Can't round-trip to non-preferred + "roundtrip": false, + "encoded": h'fa7fc00000', + "decoded": NaN, + }, + { + "description": "Negative Infinity coded as f32 instead of f16", + # Can't round-trip to non-preferred + "roundtrip": false, + "encoded": h'faff800000', + "decoded": -Infinity, + }, + { + "description": "Infinity coded as f64 instead of f16", + # Can't round-trip to non-preferred + "roundtrip": false, + "encoded": h'fb7ff0000000000000', + "decoded": Infinity, + }, + { + "description": "Trivial NaN coded as f64 instead of f16", + # Can't round-trip to non-preferred + "roundtrip": false, + "encoded": h'fb7ff8000000000000', + "decoded": NaN, + }, + { + "description": "Negative Infinity coded as f64 instead of f16", + # Can't round-trip to non-preferred + "roundtrip": false, + "encoded": h'fbfff0000000000000', + "decoded": -Infinity, + }, + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_mt7-simple.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_mt7-simple.edn new file mode 100644 index 00000000000000..4a0ea6cb8e44f5 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_mt7-simple.edn @@ -0,0 +1,36 @@ +{ + "title": "mt6-simple", + "description": "Major type 7 (mt7) simple values, from RFC 8949 appendix A", + "tests": [ + { + "description": "false", + "encoded": h'f4', + "decoded": false + }, + { + "description": "true", + "encoded": h'f5', + "decoded": true + }, + { + "description": "null", + "encoded": h'f6', + "decoded": null + }, + { + "description": "undefined", + "encoded": h'f7', + "decoded": undefined + }, + { + "description": "Unknown 0-byte simple value", + "encoded": h'f0', + "decoded": simple(16), + }, + { + "description": "Unknown 1-byte simple value", + "encoded": h'f8ff', + "decoded": simple(255), + }, + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/appA_streaming.edn b/vlib/encoding/cbor/tests/cbor_wg/appA_streaming.edn new file mode 100644 index 00000000000000..3c5ade6241d28c --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/appA_streaming.edn @@ -0,0 +1,72 @@ +{ + "title": "streaming", + "description": "Streaming, from RFC 8949 appendix A", + "tests": [ + { + "description": "Streamed byte string", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'5f42010243030405ff', + "decoded": (_ h'0102', h'030405'), + }, + { + "description": "Streamed UTF8 string", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'7f657374726561646d696e67ff', + "decoded": (_ "strea", "ming"), + }, + { + "description": "Streamed array", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'9fff', + "decoded": [_ ], + }, + { + "description": "Streamed array, nested non-streamed then streamed arrays", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'9f018202039f0405ffff', + "decoded": [_ 1, [2, 3], [_ 4, 5]], + }, + { + "description": "Streamed array, nested non-streamed arrays", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'9f01820203820405ff', + "decoded": [_ 1, [2, 3], [4, 5]], + }, + { + "description": "Non-Streamed array, nested non-streamed then streamed array", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'83018202039f0405ff', + "decoded": [1, [2, 3], [_ 4, 5]], + }, + { + "description": "Non-Streamed array, nested streamed then non-streamed array", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'83019f0203ff820405', + "decoded": [1, [_ 2, 3], [4, 5]], + }, + { + "description": "Longer streamed array of ints", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff', + "decoded": [_ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], + }, + { + "description": "Streamed map", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'bf61610161629f0203ffff', + "decoded": {_ "a": 1, "b": [_ 2, 3]}, + }, + { + "description": "Array with nested streamed map", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'826161bf61626163ff', + "decoded": ["a", {_ "b": "c"}], + }, + { + "description": "Plain streamed map", + "roundtrip": false, # decoded version is de-streamed. + "encoded": h'bf6346756ef563416d7421ff', + "decoded": {_ "Fun": true, "Amt": -2}, + }, + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/rfc8949_bad.edn b/vlib/encoding/cbor/tests/cbor_wg/rfc8949_bad.edn new file mode 100644 index 00000000000000..2f5816b68bac61 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/rfc8949_bad.edn @@ -0,0 +1,197 @@ +{ + "title": "bad", + "description": "Inputs that should fail for RFC 8949", + "fail": true, + "tests": [ + { + "description": "Missing the next byte for mt0 ai 24", + "encoded": h'18', + }, + { + "description": "Missing the next 2 bytes for mt0 ai 25", + "encoded": h'19', + }, + { + "description": "Missing the next 1 byte for mt0 ai 25", + "encoded": h'1900', + }, + { + "description": "Missing the next 4 bytes for mt0 ai 26", + "encoded": h'1a', + }, + { + "description": "Missing the next 3 bytes for mt0 ai 26", + "encoded": h'1a00', + }, + { + "description": "Missing the next 2 bytes for mt0 ai 26", + "encoded": h'1a0000', + }, + { + "description": "Missing the next byte for mt0 ai 26", + "encoded": h'1a000000', + }, + { + "description": "Missing the next 4 bytes for mt0 ai 27", + "encoded": h'1b000000', + }, + { + "description": "Invalid AI: 28", + "encoded": h'1c', + }, + { + "description": "Invalid AI: 29", + "encoded": h'1d', + }, + { + "description": "Invalid AI: 30", + "encoded": h'1e', + }, + { + "description": "Invalid streaming AI: 28", + "encoded": h'fc', + }, + { + "description": "Invalid streaming AI: 29", + "encoded": h'fd', + }, + { + "description": "Invalid streaming AI: 30", + "encoded": h'fe', + }, + { + "description": "bytes: Only 3 bytes, not 4", + "encoded": h'44010203', + }, + { + "description": "bytes: Indeterminate bytestring with nothing", + "encoded": h'5f', + }, + { + "description": "bytes: Indeterminate bytestring includes a non-bytes chunk", + "encoded": h'5f01ff', + }, + { + "description": "utf8: Only 3 bytes, not 4", + "encoded": h'64494554', + }, + { + "description": "utf8: Length 20 only has 4 bytes", + "encoded": h'7432303133', + }, + { + "description": "utf8: Indeterminate string includes a non-string chunk", + "encoded": h'7f01ff', + }, + { + "description": "utf8: no BREAK", + "encoded": h'7f657374726561646d696e', + }, + { + "description": "utf8: invalid utf8", + "encoded": h'62c0ae', + }, + { + "description": "array: missing item", + "encoded": h'81', + }, + { + "description": "array: missing second item", + "encoded": h'8201', + }, + { + "description": "array: nested missing item", + "encoded": h'8181818181', + }, + { + "description": "array: deeply-nested missing item", + "encoded": h'8181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181', + }, + { + "description": "array: invalid item", + "encoded": h'81FE', + }, + { + "description": "array: indeterminate without end", + "encoded": h'9f', + }, + { + "description": "array: indeterminate with an item, without end", + "encoded": h'9f01', + }, + { + "description": "array: streamed containing invalid", + "encoded": h'9fFEff', + }, + { + "description": "array: unexpected BREAK", + "encoded": h'91ff', + }, + { + "description": "map: expected key", + "encoded": h'a1', + }, + { + "description": "map: invalid key", + "encoded": h'a1fe01', + }, + { + "description": "map: missing value", + "encoded": h'a16161', + }, + { + "description": "map: invalid value", + "encoded": h'a16161fe', + }, + { + "description": "map: 1 key expecting 2", + "encoded": h'a20102', + }, + { + "description": "map: streaming no BREAK", + "encoded": h'bf', + }, + { + "description": "map: streaming, odd number of items", + "encoded": h'bf000103ff', + }, + { + "description": "map: streaming missing value", + "encoded": h'bf6161', + }, + { + "description": "map: streaming with item, missing BREAK", + "encoded": h'bf616101', + }, + { + "description": "map: streaming with invalid key", + "encoded": h'bfFE01', + }, + { + "description": "map: streaming with invalid value", + "encoded": h'bf01FE', + }, + { + "description": "map: unexpected BREAK in key", + "encoded": h'a1ff', + }, + { + "description": "map: unexpected BREAK in value", + "encoded": h'a100ff', + }, + { + "description": "unexpected BREAK", + "encoded": h'ff', + }, + { + "description": "date: unexpected object instead of offset", + "encoded": h'c1a1616100', + }, + { + "description": "date: unexpected object instead of string", + "encoded": h'c0a1616100', + }, + + + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg/rfc8949_good.edn b/vlib/encoding/cbor/tests/cbor_wg/rfc8949_good.edn new file mode 100644 index 00000000000000..6622023ae074c2 --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg/rfc8949_good.edn @@ -0,0 +1,526 @@ +{ + "title": "good", + "description": "Good tests for RFC 8949", + "tests": [ + # Integers + { + "description": "u8: non-preferred 0", + "roundtrip": false, + "encoded": h'18 00', + "decoded": 0, + }, + { + "description": "u8: max", + "encoded": h'18 ff', + "decoded": 255, + }, + { + "description": "s8: -1, not preferred", + "roundtrip": false, + "encoded": h'38 00', + "decoded": -1, + }, + { + "description": "s8: min", + "encoded": h'38 ff', + "decoded": -256, + }, + { + "description": "u16: max", + "encoded": h'19 ffff', + "decoded": 65535, + }, + { + "description": "u16: 0, non-preferred", + "roundtrip": false, + "encoded": h'19 0000', + "decoded": 0, + }, + { + "description": "s16: min", + "encoded": h'39 ffff', + "decoded": -65536, + }, + { + "description": "s16: -1, non-preferred", + "roundtrip": false, + "encoded": h'39 0000', + "decoded": -1, + }, + { + "description": "u32: max", + "encoded": h'1a ffffffff', + "decoded": 0xffffffff, + }, + { + "description": "u32: 0, non-preferred", + "roundtrip": false, + "encoded": h'1a 00000000', + "decoded": 0, + }, + { + "description": "u32: 65535, non-preferred", + "roundtrip": false, + "encoded": h'1a 0000ffff', + "decoded": 65535, + }, + { + "description": "s32: -65537", + "encoded": h'3a 00010000', + "decoded": -65537, + }, + { + "description": "s32: min", + "encoded": h'3a ffffffff', + "decoded": -0x100000000, + }, + { + "description": "u64: 0, not preferred", + "roundtrip": false, + "encoded": h'1b 0000000000000000', + "decoded": 0, + }, + { + "description": "u64: 65535, not preferred", + "roundtrip": false, + "encoded": h'1b 000000000000ffff', + "decoded": 65535, + }, + { + "description": "u64: 65536, not preferred", + "roundtrip": false, + "encoded": h'1b 0000000000010000', + "decoded": 65536, + }, + { + "description": "u64: MAX_SAFE_INTEGER", + "encoded": h'1b 001fffffffffffff', + "decoded": 0x1fffffffffffff, + }, + { + "description": "u64: MIN_SAFE_INTEGER", + "encoded": h'3b 001ffffffffffffe', + "decoded": -0x1fffffffffffff, + }, + { + "description": "s64: -1, not preferred", + "roundtrip": false, + "encoded": h'3b 0000000000000000', + "decoded": -1, + }, + { + "description": "s64: -65536, not preferred", + "roundtrip": false, + "encoded": h'3b 000000000000ffff', + "decoded": -65536, + }, + { + "description": "s64: -65537, not preferred", + "roundtrip": false, + "encoded": h'3b 0000000000010000', + "decoded": -65537, + }, + + # Floats + ## Around 2^53 + { + "description": "MAX_SAFE_INTEGER + 1", + "encoded": h'fa 5a000000', + "decoded": 9007199254740992.0, + }, + { + "description": "MAX_SAFE_INTEGER + 3", + "encoded": h'fb 4340000000000001', + "decoded": 9007199254740994.0, + }, + { + "description": "MIN_SAFE_INTEGER - 1", + "encoded": h'fa da000000', + "decoded": -9007199254740992.0, + }, + { + "description": "MIN_SAFE_INTEGER - 3", + "encoded": h'fb c340000000000001', + "decoded": -9007199254740994.0, + }, + ## f16 + { + "description": "f16: Min", + "encoded": h'f9 fbff', + "decoded": -65504.0, + "encodeOptions": { + "avoidInts": true, + }, + }, + { + "description": "f16: Second-smallest subnormal", + "encoded": h'f9 0002', + "decoded": 1.1920928955078125e-7, + }, + { + "description": "f16: Second-smallest subnormal, negative", + "encoded": h'f9 8002', + "decoded": -1.1920928955078125e-7, + }, + { + "description": "f16: Largest subnormal", + "roundtrip": false, # precision is lost with subnormal + "encoded": h'f9 03ff', + "decoded": 0.00006097555160522461, + }, + { + "description": "f16: Largest subnormal, negative", + "roundtrip": false, # precision is lost with subnormal + "encoded": h'f9 83ff', + "decoded": -0.00006097555160522461, + }, + { + "description": "f16: Closest to 1/3", + "encoded": h'f9 3555', + "decoded": 0.333251953125, + }, + { + "description": "f16: Closest to -1/3", + "encoded": h'f9 b555', + "decoded": -0.333251953125, + }, + { + "description": "f16: Largest number less than one", + "encoded": h'f9 3bff', + "decoded": 0.99951171875, + }, + { + "description": "f16: Smallest number greater than -1", + "encoded": h'f9 bbff', + "decoded": -0.99951171875, + }, + { + "description": "f16: Smallest number larger than one", + "encoded": h'f9 3c01', + "decoded": 1.0009765625, + }, + { + "description": "f16: Largest number less than -1", + "encoded": h'f9 bc01', + "decoded": -1.0009765625, + }, + { + "description": "f16: Largest odd number", + "encoded": h'f9 67ff', + "decoded": 2047.0, + "encodeOptions": { + "avoidInts": true, + }, + }, + { + "description": "f16: Smallest odd number", + "encoded": h'f9 e7ff', + "decoded": -2047.0, + "encodeOptions": { + "avoidInts": true, + }, + }, + { + "description": "f16: Closest value to pi", + "encoded": h'f9 4248', + "decoded": 3.140625, + }, + { + "description": "f16: Closest value to -pi", + "encoded": h'f9 c248', + "decoded": -3.140625, + }, + { + "description": "f16: Small positive", + "encoded": h'f9 4100', + "decoded": 2.5, + }, + { + "description": "f16: Small negative", + "encoded": h'f9 c100', + "decoded": -2.5, + }, + + ## f32 + { + "description": "f32: Min", + "encoded": h'fa ff7fffff', + "decoded": -3.4028234663852886e+38, + }, + { + "description": "f32: Smallest subnormal", + "encoded": h'fa 00000001', + "decoded": 1.401298464324817e-45, + }, + { + "description": "f32: Smallest subnormal, negative", + "encoded": h'fa 80000001', + "decoded": -1.401298464324817e-45, + }, + { + "description": "f32: Largest subnormal", + "encoded": h'fa 007fffff', + "decoded": 1.1754942106924411e-38, + }, + { + "description": "f32: Largest subnormal, negative", + "encoded": h'fa 807fffff', + "decoded": -1.1754942106924411e-38, + }, + { + "description": "f32: Medium subnormal", + "encoded": h'fa 00001fff', + "decoded": 1.1478035721284577e-41, + }, + { + "description": "f32: Medium subnormal, negative", + "encoded": h'fa 80001fff', + "decoded": -1.1478035721284577e-41, + }, + { + "description": "f32: smallest positive normal number", + "encoded": h'fa 00800000', + "decoded": 1.1754943508222875e-38, + }, + { + "description": "f32: largest negative normal number", + "encoded": h'fa 80800000', + "decoded": -1.1754943508222875e-38, + }, + { + "description": "f32: largest number less than one", + "encoded": h'fa 3f7fffff', + "decoded": 0.9999999403953552, + }, + { + "description": "f32: smallest number greater than -1", + "encoded": h'fa bf7fffff', + "decoded": -0.9999999403953552, + }, + { + "description": "f32: one", + "roundtrip": false, # Not preferred encoding + "encoded": h'fa 3f800000', + "decoded": 1.0, + }, + { + "description": "f32: -1", + "roundtrip": false, # Not preferred encoding + "encoded": h'fa bf800000', + "decoded": -1.0, + }, + { + "description": "f32: smallest number larger than one", + "encoded": h'fa 3f800001', + "decoded": 1.0000001192092896, + }, + { + "description": "f32: 0", + "roundtrip": false, # Not preferred encoding + "encoded": h'fa 00000000', + "decoded": 0.0, + }, + { + "description": "f32: -0", + "roundtrip": false, # Not preferred encoding + "encoded": h'fa 80000000', + "decoded": -0.0, + }, + { + "description": "f32: 1/3", + "encoded": h'fa 3eaaaaab', + "decoded": 0.3333333432674408, + }, + { + "description": "f32: -1/3", + "encoded": h'fa beaaaaab', + "decoded": -0.3333333432674408, + }, + { + "description": "f32: pi", + "encoded": h'fa 40490fdb', + "decoded": 3.1415927410125732, + }, + { + "description": "f32: -pi", + "encoded": h'fa c0490fdb', + "decoded": -3.1415927410125732, + }, + + + # f64 + { + "description": "f64: Max", + "encoded": h'fb 7fefffffffffffff', + "decoded": 1.7976931348623157e+308, + }, + { + "description": "f64: Min", + "encoded": h'fb ffefffffffffffff', + "decoded": -1.7976931348623157e+308, + }, + { + "description": "f64: Min subnormal", + "encoded": h'fb 0000000000000001', + "decoded": 5e-324, + }, + { + "description": "f64: Min subnormal, negative", + "encoded": h'fb 8000000000000001', + "decoded": -5e-324, + }, + { + "description": "f64: Largest subnormal", + "encoded": h'fb 000FFFFFFFFFFFFF', + "decoded": 2.225073858507201e-308, + }, + { + "description": "f64: Largest subnormal, negative", + "encoded": h'fb 800FFFFFFFFFFFFF', + "decoded": -2.225073858507201e-308, + }, + { + "description": "f64: Smallest normal", + "encoded": h'fb 0010000000000000', + "decoded": 2.2250738585072014e-308, + }, + { + "description": "f64: Smallest normal, negative", + "encoded": h'fb 8010000000000000', + "decoded": -2.2250738585072014e-308, + }, + { + "description": "f64: 1/3", + "encoded": h'fb 3FD5555555555555', + "decoded": 0.3333333333333333, + }, + { + "description": "f64: -1/3", + "encoded": h'fb BFD5555555555555', + "decoded": -0.3333333333333333, + }, + { + "description": "f64: pi", + "encoded": h'fb 400921FB54442D18', + "decoded": 3.141592653589793, + }, + { + "description": "f64: -pi", + "encoded": h'fb C00921FB54442D18', + "decoded": -3.141592653589793, + }, + + # Bigint + { + "description": "bigint: Positive", + "encoded": h'c2 49 1c0000000000000000', + "decoded": 0x1c0000000000000000, + }, + { + "description": "bigint: Negative", + "encoded": h'c3 49 1c0000000000000000', + "decoded": -0x1c0000000000000001, + }, + + # UTF8 String + { + "description": "string: BOM", + "encoded": h'66 efbbbf424f4d', + "decoded": "\ufeffBOM", + }, + { + "description": "string: combining", + "encoded": h'63 75cc88', + "decoded": "ü", + }, + { + "description": "string: zalgo, length 1384", + "encoded": h'790568c6b8ccb7cd82cd8bccbfcd8acd98cc9bcc92cc95cc8dcc88ccbdcc8acc88cc8ccc83cda0cc84cd9bccbdcc88cc93cc87cc8ecd9fcd95ccbacc9ccc98cc9dcd9acd88cca9cca7cca4cd95cd99cd89cd96cd9cccadcc9ee1b8b3ccb7cc8ccc89cc80cd80ccbfcc83cc8dcd82cc90cc88cd90cc8ecc85cd98cc94cc86ccb2cd87ccaccca1cd9fcd8eccafccb3cca9cc9cea9cacccb4cda1cc84cd9ecc88cc83cc86cc80cc9acd8acd92cc87cd92cc94cc90cc89cc89cc91ccbecd8bcc84cc92cda0ccabcd89cca8e1b889ccb6ccbecc8ccc83cc94cd9bcd98cc8fcc8dcc8bcd86cda0cca8cca6cd9ccca9cca6ccabcd87ccbcccb0cd9fcca8cd9fcd9cccafccbccca4ccb9cd93cc9ecc97cc9fcc9fcc99cd9accb3ccb3cd9acc9fcca8c8a5ccb6ccbecc8ccc84cd82cd80cc8ccd92ccbecd82cc87cd9ecd90cd92cc88cc94cc8bcc89cc83cd97cd97cd98cc85cc90cc83cd90cc9acd91ccb1ccabcca7cd89ccbccca6cd88cca9ccb1cca8ccbccd8dccaecd8dcd87cd95e1b8a9ccb4cd8ccc8fcc86cc87cd8bcd81cc95cda0cc80cc87cc86cd9ecd98cc87cc89cc81cc84cd83cd9acd94ccbbcca5cd85cd8ecca5cc99cd9ccc9ecca6ccadccbccca8cca2cca1cd87ccb3ccaaccb3cc99ccb2cca2e1ba8accb4cd84cda0cd9dcd9bcc9acd92cc91cc92cc81cd9dcc87cd91cc8dcd9fcd96cca1cca3ccb0cc9eccaacca0cd88cd88ccaacca0ccbacca2cca7ccbbcd95ccb1ccbbccafcca5cd8dea9e8eccb7cc80cc83cc90cc91cd83cc93cd83cd92cc83cd90cda1ccbfcd9ecc95cc82cd98cc94cc88cd82cc8dcd97cca8cc9fcc9fccb2ccb1ccafccbbcd8eccb2cc96cc9dcd9ccd88ccbccd95cca8cca254ccb6cd83cc82cc94cc84cda0cc8bcc9acd97cc8ccd9dcc92cc83cc90cc8acc9bcc83cd86ccbdcc88cc94cca5cc9fccb0cd96cd8ecd8ecd94cd9fcd89cd94cc9ecca7cca7ccaecd89cd89cd94ccbacca9cd87cd88cd87cd8dcd89cd9accabcd93ccbae1b985ccb4cd81cc94cd9dcc92cd82cc95ccbfcc93cc8ccc95cd98cd82ccbdcc95cd83cc86cc9bcc94cc8dcd90cda0cd9ccd95cd87ccbccc9ecc9ccd96ccb3cd85ccb3cd8ecca2ccadccbbcd93ccb1e1bab0ccb8cd97cc95cc9acd8bcc82cc88cc86cd84cc94cd92cc81cc88cc8acc8ecc91cc94cd8accbecd8bcc85ccbecd98cc85cc8acc94cc8bcca5ccb9cca6ccafcd87cd9fcd9acca5cd89cd8dcd9fcd93ccafccbbcd87cca2ccb1ccadccb2cd89ccadcca4cd8dcca1cca3cc9cccb3cab2ccb8cd86cc90cc89cc83cc91ccacccbacc99cca6cc96ccbbcca8ccbbcca2ca89ccb4cd86cc8bcd86cd9ecd90cc81cd81cd8acc9bcda0cc84cc90cd9ecd8accbecca9cd96cd93ccb9ccb3cd8dccbccd89ccafcd8ecca5cd94cd85cc9dcca6cd8ecd93cca2c6bdccb7cd84cd86cc83cc95cd90cc92cc98cd93cc9dcca8cca4ccbcccafcd85ccafcd8dcd94cc9dcd87ccaccd9accaecca5ccb1ccbcccb1cca0ccadcc98cca3cc9dccbbcd9fccbccc97e1bbbcccb6cc94cd80cd92cd80cda0cc8fcc81cd9ecc92cd83cd92cd84cc9bcd92cd8ccd86cc8dcc94cc84cda1cd91cd83cd84cc8dcd97cda0cd80cca2cd88cd93cd8dcd93cca1cca4ccafcca1cca4ccaccca0ccb9cd9acc9ecc99ccbaccb1cca2cc9ccd95cca4cc99cca9ccb0cd8eccaee1b9bdccb7cc95cd92cc9acc81cd91cc90cda0cc97cca5cca9cd94cd87cca3cd99ccb1cc9ecca7cca6ccb2ccbbcc96cc9fccb3cc99ccaecca4cca2cca5cca3e1b8b8ccb6cc8ecd81cda1cc83cd91cd97cd8dcc9fcd88cc98ccb1ccb1cd8dcc9fcca9ccb3cd8eccbcccabcca0cd85cd8dcc96cd9acca0cd93cd9fcca4cca9cd8dccb9cc9fcca5ccbbcd9fc7b0ccb5cc83cd8cccbecc8acd8bcd89cd85ccaacd89ccb1cca0cd88cca2ccaccd9ccd8dccadccaaccb9cca6ccb1cca0cca1cd88', + "decoded": "Ƹ̷̧̛͕̺̜̘̝͚͈̩̤͕͙͉͖̭̞͂͋̿͊̒̍̈̽̊̈̌̃̄͛̽̈̓̇̎͘̕͟͜͠ḳ̷̡̲͇̬͎̯̳̩̜̌̉̀̀̿̃̍͂̐̈͐̎̅̔̆͘͟Ꜭ̴̨̫͉̄̈̃̆̀͊͒̇͒̔̐̉̉̑̾͋̄̒̚͡͞͠ḉ̶̨̨̨̦̩̦̫͇̼̰̯̼̤̹͓̞̗̟̟̙͚̳̳͚̟̾̌̃̔͛̏̍̋͆͘͜͟͟͜͠ȥ̶̧̨̱̫͉̼̦͈̩̱̼͍̮͍͇͕̾̌̄͂̀̌͒̾͂̇͐͒̈̔̋̉̃͗͗̅̐̃͐͑͘̚͞ḩ̴̨̢̡̢͚͔̻̥͎̥̙̞̦̭̼͇̳̪̳̙̲͌̏̆̇͋́̀̇̆̇̉́̄̓̕͘͜͠͞ͅẊ̴̡̢̧͖̣̰̞̪̠͈͈̪̠̺̻͕̱̻̯̥͍̈́͛͒̑̒́̇͑̍̚͟͠͝͝ꞎ̷̨̨̢̟̟̲̱̯̻͎̲̖̝͈̼͕̀̃̐̑̓̓̓͒̃͐̿̂̔̈͂̍͗̕͘͜͡͞Ţ̶̧̛̥̟̰͖͎͎͔͉͔̞̮͉͉͔̺̩͇͈͇͍͉͚̫͓̺̓̂̔̄̋͗̌̒̃̐̊̃͆̽̈̔̚͟͠͝ṅ̴̢̛͕͇̼̞̜͖̳̳͎̭̻͓̱́̔̒͂̿̓̌͂̽̓̆̔̍͐̕̕͘̕͜͝͠ͅḀ̸̢̡̹̦̯͇͚̥͉͍͓̯̻͇̱̭̲͉̭̤͍̣̜̳̆̀͗͋̂̈̆̈́̔͒́̈̊̎̑̔͊̾͋̅̾̅̊̔̋̕̚͘͟͟ʲ̸̨̢̬̺̙̦̖̻̻͆̐̉̃̑ʉ̴̢̛̩͖͓̹̳͍̼͉̯͎̥͔̝̦͎͓͆̋͆͐́́͊̄̐͊̾͞͠͞ͅƽ̷̨̘͓̝̤̼̯̯͍͔̝͇̬͚̮̥̱̼̱̠̭̘̣̝̻̼̗̈́͆̃͐̒̕͟ͅỼ̶̢̡̡̢̛͈͓͍͓̤̯̤̬̠̹͚̞̙̺̱̜͕̤̙̩̰͎̮̔̀͒̀̏́̒̓͒̈́͒͌͆̍̔̄͑̓̈́̍͗̀͠͞͡͠ṽ̷̧̢̗̥̩͔͇̣͙̱̞̦̲̻̖̟̳̙̮̤̥̣͒́͑̐̕̚͠Ḹ̶͍̟͈̘̱̱͍̟̩̳͎̼̫̠͍̖͚̠͓̤̩͍̹̟̥̻̎́̃͑͗͟͟͡ͅǰ̵̢̡͉̪͉̱̠͈̬͍̭̪̹̦̱̠͈̃͌̾̊͋͜ͅ", + }, + + # Date + { + "description": "Date: 0 epoch", + "encoded": h'c100', + "decoded": 1(0), + }, + { + "description": "Date: 1 epoch", + "encoded": h'c101', + "decoded": 1(1), + }, + { + "description": "Date: -1 epoch", + "encoded": h'c120', + "decoded": 1(-1), + }, + + # TODO: Large tag numbers, which have been reserved by IANA + + # Map + { + "description": "Map: -0 key", + "roundtrip": false, # JS Map converts -0 to 0 in map key + "encoded": h'a1f9800080', + "decoded": { + -0.0: [], + }, + }, + { + "description": "Map: interesting keys", + "roundtrip": false, + "encoded": h'b81a808081008081808081810080f580f480f680f7800080613080fb3fb999999999999a8001802080f97c0080f9fc0080f97e0080c2491c000000000000000080a080a1808080a1a08080a1a18080808040804100806080616180c10080', + "decoded": { + []: [], + [0]: [], + [[]]: [], + [[0]]: [], + true: [], + false: [], + null: [], + undefined: [], + 0: [], + "0": [], + 0.1: [], + 1: [], + -1: [], + Infinity: [], + -Infinity: [], + NaN: [], + 0x1c0000000000000000: [], + {}: [], + {[]: []}: [], + {{}: []}: [], + {{[]: []}: []}: [], + h'': [], + h'00': [], + "": [], + "a": [], + 1(0): [] + }, + }, + { + "description": "array: deeply-nested", + "encoded": h'8181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818181818100', + "decoded": [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[0]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]], + }, + { + "description": "map: deeply-nested key", + "encoded": h'a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000', + "decoded": {{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{0: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}: 0}, + }, + { + "description": "map: deeply-nested value", + "encoded": h'a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a100a10000', + "decoded": {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: {0: 0}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} + }, + + # JS edge cases + { + "description": "JS: __proto__ should be escaped (security)", + "encoded": h'a1695f5f70726f746f5f5f00', + "decoded": {"__proto__": 0}, + } + ], +} diff --git a/vlib/encoding/cbor/tests/cbor_wg_test.v b/vlib/encoding/cbor/tests/cbor_wg_test.v new file mode 100644 index 00000000000000..8a591819ccd9cd --- /dev/null +++ b/vlib/encoding/cbor/tests/cbor_wg_test.v @@ -0,0 +1,152 @@ +// CBOR Working Group conformance corpus +// (https://github.com/cbor-wg/cbor-test-vectors). +// +// Two fixture files live next to this test: +// +// * `cbor_wg/rfc8949_good.edn` — 88 well-formed payloads. Each must +// decode without error. +// * `cbor_wg/rfc8949_bad.edn` — 47 malformed payloads. Each MUST be +// rejected by the decoder per RFC 8949 §3. +// +// The fixtures are EDN (CBOR Diagnostic Notation, RFC 8610) — we only +// pull the `"encoded": h'…'` hex literals because that's what we need +// to drive the decoder. The expected `decoded` value is left to the +// other test files (rfc8949_appendix_a, upstream_appendix_a) which use +// the JSON-encoded corpus. +module main + +import encoding.cbor +import encoding.hex +import os + +const wg_dir = os.join_path(os.dir(@FILE), 'cbor_wg') + +// extract_hex_literals pulls every `"encoded": h'…'` value out of an EDN +// file. The hex string can contain whitespace (visual grouping per +// RFC 8610) — we strip it before decoding. +fn extract_hex_literals(text string) []string { + mut out := []string{} + mut i := 0 + needle := '"encoded": h\'' + for { + idx := text.index_after(needle, i) or { break } + start := idx + needle.len + end := text.index_after("'", start) or { break } + raw := text[start..end] + mut clean := []u8{cap: raw.len} + for c in raw { + if c == ` ` || c == `\t` || c == `\n` || c == `\r` { + continue + } + clean << c + } + out << clean.bytestr() + i = end + 1 + } + return out +} + +fn test_extractor_sanity() { + good := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn')) or { + panic('cannot read good.edn: ${err}') + } + bad := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn')) or { + panic('cannot read bad.edn: ${err}') + } + good_hexes := extract_hex_literals(good) + bad_hexes := extract_hex_literals(bad) + assert good_hexes.len == 88, 'good corpus drift: ${good_hexes.len} (expected 88)' + assert bad_hexes.len == 47, 'bad corpus drift: ${bad_hexes.len} (expected 47)' +} + +fn test_cbor_wg_good_corpus() { + text := os.read_file(os.join_path(wg_dir, 'rfc8949_good.edn'))! + hexes := extract_hex_literals(text) + // The corpus deliberately stresses 256+ deep nesting; raise the cap. + opts := cbor.DecodeOpts{ + max_depth: 4096 + } + mut failures := []string{} + for hex_str in hexes { + bytes := hex.decode(hex_str) or { + failures << '${hex_str}: hex decode: ${err}' + continue + } + cbor.decode[cbor.Value](bytes, opts) or { + failures << '${hex_str}: ${err}' + continue + } + } + if failures.len > 0 { + for f in failures { + eprintln('GOOD-FAIL: ${f}') + } + assert false, '${failures.len}/${hexes.len} good vectors rejected' + } +} + +// Per-major-type files (mt0..mt7) and the streaming/indefinite suite all +// hold well-formed entries. Each must decode without error. +const mt_files = [ + 'appA_mt0.edn', + 'appA_mt1.edn', + 'appA_mt2.edn', + 'appA_mt3.edn', + 'appA_mt4.edn', + 'appA_mt5.edn', + 'appA_mt6.edn', + 'appA_mt7-float.edn', + 'appA_mt7-simple.edn', + 'appA_streaming.edn', +] + +fn test_cbor_wg_per_major_type_corpus() { + mut total := 0 + mut failures := []string{} + for fname in mt_files { + text := os.read_file(os.join_path(wg_dir, fname)) or { + panic('cannot read ${fname}: ${err}') + } + hexes := extract_hex_literals(text) + assert hexes.len > 0, '${fname} has no entries' + for hex_str in hexes { + total++ + bytes := hex.decode(hex_str) or { + failures << '${fname} ${hex_str}: hex: ${err}' + continue + } + cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) or { + failures << '${fname} ${hex_str}: ${err}' + continue + } + } + } + if failures.len > 0 { + for f in failures { + eprintln('MT-FAIL: ${f}') + } + assert false, '${failures.len}/${total} per-major-type vectors rejected' + } + assert total >= 80, 'corpus too small: ${total}' +} + +fn test_cbor_wg_bad_corpus() { + text := os.read_file(os.join_path(wg_dir, 'rfc8949_bad.edn'))! + hexes := extract_hex_literals(text) + mut accepted_anyway := []string{} + for hex_str in hexes { + bytes := hex.decode(hex_str) or { + // Malformed at the hex layer is still a rejection; skip. + continue + } + if v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{}) { + accepted_anyway << '${hex_str} → ${v.type_name()}' + } + } + if accepted_anyway.len > 0 { + for a in accepted_anyway { + eprintln('BAD-ACCEPTED: ${a}') + } + assert false, '${accepted_anyway.len}/${hexes.len} malformed vectors were not rejected' + } +} diff --git a/vlib/encoding/cbor/tests/cose_cwt_test.v b/vlib/encoding/cbor/tests/cose_cwt_test.v new file mode 100644 index 00000000000000..17298b7a952867 --- /dev/null +++ b/vlib/encoding/cbor/tests/cose_cwt_test.v @@ -0,0 +1,258 @@ +// Real-world conformance: drives CBOR-Web-Token (RFC 8392) and COSE +// (RFC 8152) sample structures. These are the canonical CBOR payloads +// used by IoT auth, OAuth 2.0 PoP, and EAT (RFC 9711). They exercise +// canonical encoding, signed/MAC/COSE_Sign1 structures, and standard +// claim-set integer keys. +module main + +import encoding.cbor +import encoding.hex + +fn h_(s string) []u8 { + return hex.decode(s) or { panic('bad hex ${s}') } +} + +fn b_eq(a []u8, b []u8) bool { + if a.len != b.len { + return false + } + for i in 0 .. a.len { + if a[i] != b[i] { + return false + } + } + return true +} + +// -------------------------------------------------------------------- +// RFC 8392 §A.1 — Example CWT Claims Set +// +// { +// 1: "coap://as.example.com", // iss +// 2: "erikw", // sub +// 3: "coap://light.example.com",// aud +// 4: 1444064944, // exp +// 5: 1443944944, // nbf +// 6: 1443944944, // iat +// 7: h'0b71' // cti +// } +const a1_claims_hex = 'a70175636f61703a2f2f61732e6578616d706c652e636f6d02656572696b77' + + '037818636f61703a2f2f6c696768742e6578616d706c652e636f6d041a5612ae' + + 'b0051a5610d9f0061a5610d9f007420b71' + +fn test_rfc8392_a1_cwt_claims_set() { + v := cbor.decode[cbor.Value](h_(a1_claims_hex), cbor.DecodeOpts{}) or { + panic('decode CWT claims: ${err}') + } + if v !is cbor.Map { + assert false, 'CWT claim set must decode to a Map' + return + } + m := v as cbor.Map + assert m.pairs.len == 7 + + // Helper: find pair by integer key. + mut by_key := map[u64]cbor.Value{} + for pair in m.pairs { + key := pair.key + if key !is cbor.IntNum { + assert false, 'CWT claim key must be an integer (got ${key.type_name()})' + return + } + k := key as cbor.IntNum + assert !k.negative + by_key[k.magnitude] = pair.value + } + + // iss / sub / aud / exp / nbf / iat / cti + iss := by_key[1] or { + assert false, 'missing iss' + return + } + assert (iss as cbor.Text).value == 'coap://as.example.com' + sub := by_key[2] or { + assert false, 'missing sub' + return + } + assert (sub as cbor.Text).value == 'erikw' + aud := by_key[3] or { + assert false, 'missing aud' + return + } + assert (aud as cbor.Text).value == 'coap://light.example.com' + exp := by_key[4] or { + assert false, 'missing exp' + return + } + exp_int := exp as cbor.IntNum + assert !exp_int.negative && exp_int.magnitude == 1444064944 + nbf := by_key[5] or { + assert false, 'missing nbf' + return + } + nbf_int := nbf as cbor.IntNum + assert !nbf_int.negative && nbf_int.magnitude == 1443944944 + iat := by_key[6] or { + assert false, 'missing iat' + return + } + iat_int := iat as cbor.IntNum + assert !iat_int.negative && iat_int.magnitude == 1443944944 + cti := by_key[7] or { + assert false, 'missing cti' + return + } + cti_bs := cti as cbor.Bytes + assert b_eq(cti_bs.data, [u8(0x0b), 0x71]) + + // Re-encode through the Value tree: must be byte-identical (RFC 8392 + // claim sets are already in canonical form per §7). + out := cbor.encode_value(v, cbor.EncodeOpts{}) + assert b_eq(out, h_(a1_claims_hex)), 'CWT round-trip mismatch' +} + +// -------------------------------------------------------------------- +// RFC 8392 §A.3 — COSE_Mac0-tagged CWT (tag 17 = CBOR_Tag_COSE_Mac0) +// The outer is `61(...)` = tag 61 (CWT) wrapping a COSE_Mac0 (tag 17). +// We don't validate the MAC — only the CBOR structure parses cleanly, +// the protected header is a bstr, the claim payload is a bstr containing +// the §A.1 claims, and the tag is correctly identified. +// Tag 61 (CWT) → tag 17 (COSE_Mac0) → [protected={1:5}, {}, payload, mac_tag] +// Constructed from §A.1 claims + §A.3 example MAC. +const a3_mac_hex = 'd83d' + // CWT tag + 'd1' + // COSE_Mac0 tag + '84' + // array(4) + '43a10105' + // bstr(3): {1:5} (HMAC 256/64) + 'a0' + // {} + '5850' + a1_claims_hex + // bstr(80): claims + '48093101ef6d789200' // bstr(8): MAC + +fn test_rfc8392_a3_cose_mac0_cwt() { + v := cbor.decode[cbor.Value](h_(a3_mac_hex), cbor.DecodeOpts{}) or { + panic('decode CWT-Mac0: ${err}') + } + // Outer is tag 61 (CWT). + if v !is cbor.Tag { + assert false, 'expected tag 61' + return + } + cwt_tag := v as cbor.Tag + assert cwt_tag.number == 61, 'outer tag = ${cwt_tag.number}, want 61' + + // Inner is tag 17 (COSE_Mac0). + inner := cwt_tag.content() + if inner !is cbor.Tag { + assert false, 'expected tag 17 inside CWT' + return + } + mac0 := inner as cbor.Tag + assert mac0.number == 17, 'inner tag = ${mac0.number}, want 17' + + // COSE_Mac0 = [protected, unprotected, payload, tag] + body := mac0.content() + if body !is cbor.Array { + assert false, 'COSE_Mac0 must be array' + return + } + arr := body as cbor.Array + assert arr.elements.len == 4, 'COSE_Mac0 must have 4 elements, got ${arr.elements.len}' + + // protected is a bstr wrapping a CBOR-encoded map. + protected_bs := arr.elements[0] as cbor.Bytes + protected_map := cbor.decode[cbor.Value](protected_bs.data, cbor.DecodeOpts{}) or { + panic('decode protected header: ${err}') + } + assert protected_map is cbor.Map + + // unprotected is an empty map. + assert arr.elements[1] is cbor.Map + assert (arr.elements[1] as cbor.Map).pairs.len == 0 + + // payload is a bstr that decodes to the §A.1 claims set. + payload_bs := arr.elements[2] as cbor.Bytes + claims := cbor.decode[cbor.Value](payload_bs.data, cbor.DecodeOpts{}) or { + panic('decode payload: ${err}') + } + assert claims is cbor.Map + claims_map := claims as cbor.Map + assert claims_map.pairs.len == 7, 'expected 7 claims, got ${claims_map.pairs.len}' + + // Round-trip: re-encode the entire structure and compare bytes. + out := cbor.encode_value(v, cbor.EncodeOpts{}) + assert b_eq(out, h_(a3_mac_hex)), 'CWT-Mac0 round-trip mismatch' +} + +// -------------------------------------------------------------------- +// RFC 8152 §C.2.1 — COSE_Sign1 single-signer ECDSA example +// 18([h'a201260300', {}, h'546869732069732074686520636f6e74656e742e', +// h'6520bbaf2081d7e0ed0f95f76eb0733d667005f7467cec4b87b9381a6ba1ed' + +// 'e8e00df29f32a37230f39a842a54821fdd223092819d7728efb9d3a0080b75']) +// +// The fully-encoded form is published in the working group test +// vectors. We encode it from its components to validate that the +// pieces round-trip — that's the meaningful interop check (the actual +// signature isn't verified). +fn test_cose_sign1_structure() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_tag(18) // COSE_Sign1 + p.pack_array_header(4) + // protected (bstr containing {1: -7}) −7 = ECDSA w/ SHA-256 + p.pack_bytes(h_('a10126')) + // unprotected (empty map) + p.pack_map_header(0) + // payload + p.pack_bytes('This is the content.'.bytes()) + // signature (truncated example) + p.pack_bytes(h_('8eb33e4ca31d1c465ab05aac34cc6b23d58fef5c083106c4d25a91aef0b0117e2af9a291aa32e14ab834dc56ed2a223444547e01f11d3b0916e5a4c345cacb36')) + + out := p.bytes() + + // Decode back and validate the COSE_Sign1 shape. + v := cbor.decode[cbor.Value](out, cbor.DecodeOpts{}) or { panic('decode Sign1: ${err}') } + tag := v as cbor.Tag + assert tag.number == 18, 'tag = ${tag.number}, want 18 (COSE_Sign1)' + + body := tag.content() as cbor.Array + assert body.elements.len == 4 + + protected := body.elements[0] as cbor.Bytes + hdr := cbor.decode[cbor.Value](protected.data, cbor.DecodeOpts{}) or { + panic('decode protected: ${err}') + } + hdr_map := hdr as cbor.Map + assert hdr_map.pairs.len == 1 + // alg label = 1, value = -7 + alg_key := hdr_map.pairs[0].key as cbor.IntNum + assert !alg_key.negative && alg_key.magnitude == 1 + alg_val := hdr_map.pairs[0].value as cbor.IntNum + assert alg_val.negative && alg_val.magnitude == 6 // -7 = -1 - 6 + + payload := body.elements[2] as cbor.Bytes + assert payload.data.bytestr() == 'This is the content.' + + // Roundtrip the whole thing through Value tree. + rt := cbor.encode_value(v, cbor.EncodeOpts{}) + assert b_eq(rt, out), 'COSE_Sign1 round-trip mismatch' +} + +// -------------------------------------------------------------------- +// RFC 8152 §3 — Sig_structure used as Signature Input +// Sig_structure = [context, body_protected, external_aad, payload] +// This covers canonical encoding requirements (§4.4): when computing +// the to-be-signed bytes, the structure MUST be deterministically encoded. +fn test_sig_structure_canonical() { + mut p := cbor.new_packer(cbor.EncodeOpts{ canonical: true }) + p.pack_array_header(4) + p.pack_text('Signature1') + p.pack_bytes(h_('a10126')) + p.pack_bytes([]u8{}) // external_aad + p.pack_bytes('payload bytes'.bytes()) + encoded := p.bytes() + + // Re-encode in canonical mode through the Value tree must be identical. + v := cbor.decode[cbor.Value](encoded, cbor.DecodeOpts{}) or { panic(err) } + mut p2 := cbor.new_packer(cbor.EncodeOpts{ canonical: true }) + p2.pack_value(v) + rt := p2.bytes() + assert b_eq(rt, encoded), 'Sig_structure canonical round-trip differs' +} diff --git a/vlib/encoding/cbor/tests/generic_test.v b/vlib/encoding/cbor/tests/generic_test.v new file mode 100644 index 00000000000000..fc7067f0a85ca9 --- /dev/null +++ b/vlib/encoding/cbor/tests/generic_test.v @@ -0,0 +1,284 @@ +// Generic encode[T]/decode[T] coverage. Exercises every supported V +// type family — primitives, arrays, maps, structs (with attributes), +// optional fields, enums, RawMessage, Marshaler/Unmarshaler — and +// asserts byte-exact output for at least one case per family so we +// catch silent encoding drift. +module main + +import encoding.cbor +import encoding.hex + +fn h(s string) []u8 { + return hex.decode(s) or { panic('invalid hex: ${s}') } +} + +fn beq(a []u8, b []u8) bool { + if a.len != b.len { + return false + } + for i in 0 .. a.len { + if a[i] != b[i] { + return false + } + } + return true +} + +// --------------------------------------------------------------------- +// Primitive round-trips +// --------------------------------------------------------------------- + +fn test_round_trip_primitives() { + bytes_bool := cbor.encode[bool](true, cbor.EncodeOpts{})! + assert cbor.decode[bool](bytes_bool, cbor.DecodeOpts{})! == true + + bytes_i32 := cbor.encode[i32](-42, cbor.EncodeOpts{})! + assert cbor.decode[i32](bytes_i32, cbor.DecodeOpts{})! == -42 + + bytes_u64 := cbor.encode[u64](u64(0x1234_5678_9abc_def0), cbor.EncodeOpts{})! + assert cbor.decode[u64](bytes_u64, cbor.DecodeOpts{})! == 0x1234_5678_9abc_def0 + + bytes_f64 := cbor.encode[f64](3.141592653589793, cbor.EncodeOpts{})! + assert cbor.decode[f64](bytes_f64, cbor.DecodeOpts{})! == 3.141592653589793 + + bytes_str := cbor.encode[string]('hello, 世界', cbor.EncodeOpts{})! + assert cbor.decode[string](bytes_str, cbor.DecodeOpts{})! == 'hello, 世界' +} + +// --------------------------------------------------------------------- +// Arrays and maps +// --------------------------------------------------------------------- + +fn test_array_round_trip() { + src := [1, 2, 3, 4, 5] + bytes := cbor.encode[[]int](src, cbor.EncodeOpts{})! + got := cbor.decode[[]int](bytes, cbor.DecodeOpts{})! + assert got == src +} + +fn test_map_round_trip() { + src := { + 'a': 1 + 'b': 2 + 'c': 3 + } + bytes := cbor.encode[map[string]int](src, cbor.EncodeOpts{})! + got := cbor.decode[map[string]int](bytes, cbor.DecodeOpts{})! + for k, v in src { + assert got[k] == v + } + assert got.len == src.len +} + +fn test_nested_array_map() { + src := [ + { + 'k': 'v1' + }, + { + 'k': 'v2' + }, + ] + bytes := cbor.encode[[]map[string]string](src, cbor.EncodeOpts{})! + got := cbor.decode[[]map[string]string](bytes, cbor.DecodeOpts{})! + assert got.len == 2 + assert got[0]['k'] == 'v1' + assert got[1]['k'] == 'v2' +} + +// --------------------------------------------------------------------- +// Structs — attributes, optional, rename strategies +// --------------------------------------------------------------------- + +struct Person { + name string + age int +} + +fn test_struct_basic() { + p := Person{ + name: 'Alice' + age: 42 + } + bytes := cbor.encode[Person](p, cbor.EncodeOpts{})! + got := cbor.decode[Person](bytes, cbor.DecodeOpts{})! + assert got.name == 'Alice' + assert got.age == 42 +} + +struct WithAttrs { + user_id string @[cbor: 'uid'] + password string @[skip] + internal string @[cbor: '-'] + keep string +} + +fn test_struct_attributes() { + p := WithAttrs{ + user_id: 'u-1' + password: 'secret' + internal: 'hidden' + keep: 'visible' + } + bytes := cbor.encode[WithAttrs](p, cbor.EncodeOpts{})! + // Decode generically to inspect structure. + v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{})! + assert v is cbor.Map + if v is cbor.Map { + mut keys := []string{} + for pair in v.pairs { + if pair.key is cbor.Text { + keys << pair.key.value + } + } + assert 'uid' in keys + assert 'keep' in keys + assert 'password' !in keys + assert 'internal' !in keys + } + got := cbor.decode[WithAttrs](bytes, cbor.DecodeOpts{})! + assert got.user_id == 'u-1' + assert got.keep == 'visible' +} + +struct WithOption { + name string + tag ?string +} + +fn test_struct_option_field() { + none_p := WithOption{ + name: 'a' + tag: none + } + bytes := cbor.encode[WithOption](none_p, cbor.EncodeOpts{})! + got := cbor.decode[WithOption](bytes, cbor.DecodeOpts{})! + assert got.name == 'a' + assert got.tag == none + + some_p := WithOption{ + name: 'b' + tag: ?string('hot') + } + bytes2 := cbor.encode[WithOption](some_p, cbor.EncodeOpts{})! + got2 := cbor.decode[WithOption](bytes2, cbor.DecodeOpts{})! + assert got2.name == 'b' + assert got2.tag != none + assert got2.tag or { '' } == 'hot' +} + +@[cbor_rename_all: 'kebab-case'] +struct WithRename { + user_name string + user_age int +} + +fn test_struct_rename_all() { + p := WithRename{ + user_name: 'Bob' + user_age: 30 + } + bytes := cbor.encode[WithRename](p, cbor.EncodeOpts{})! + v := cbor.decode[cbor.Value](bytes, cbor.DecodeOpts{})! + if v is cbor.Map { + mut keys := []string{} + for pair in v.pairs { + if pair.key is cbor.Text { + keys << pair.key.value + } + } + assert 'user-name' in keys + assert 'user-age' in keys + } + // And the rename round-trips. + got := cbor.decode[WithRename](bytes, cbor.DecodeOpts{})! + assert got.user_name == 'Bob' + assert got.user_age == 30 +} + +// --------------------------------------------------------------------- +// Enum +// --------------------------------------------------------------------- + +enum Color { + red + green + blue +} + +fn test_enum_round_trip() { + bytes := cbor.encode[Color](Color.green, cbor.EncodeOpts{})! + got := cbor.decode[Color](bytes, cbor.DecodeOpts{})! + assert got == Color.green +} + +// --------------------------------------------------------------------- +// RawMessage — preserves bytes byte-for-byte +// --------------------------------------------------------------------- + +fn test_raw_message_round_trip() { + original := h('a26161016162820203') + raw := cbor.decode[cbor.RawMessage](original, cbor.DecodeOpts{})! + again := cbor.encode[cbor.RawMessage](raw, cbor.EncodeOpts{})! + assert beq(again, original) +} + +// --------------------------------------------------------------------- +// Marshaler / Unmarshaler — user-controlled wire format +// --------------------------------------------------------------------- + +struct Ipv4 { +mut: + octets [4]u8 +} + +pub fn (ip Ipv4) to_cbor() []u8 { + mut p := cbor.new_packer(cbor.EncodeOpts{ initial_cap: 8 }) + p.pack_bytes([ip.octets[0], ip.octets[1], ip.octets[2], ip.octets[3]]) + return p.bytes().clone() +} + +pub fn (mut ip Ipv4) from_cbor(data []u8) ! { + mut u := cbor.new_unpacker(data, cbor.DecodeOpts{}) + bytes := u.unpack_bytes()! + if bytes.len != 4 { + return error('Ipv4: expected 4 bytes, got ${bytes.len}') + } + ip.octets[0] = bytes[0] + ip.octets[1] = bytes[1] + ip.octets[2] = bytes[2] + ip.octets[3] = bytes[3] +} + +fn test_marshaler_round_trip() { + ip := Ipv4{ + octets: [u8(192), 168, 1, 1]! + } + bytes := cbor.encode[Ipv4](ip, cbor.EncodeOpts{})! + // Wire bytes: 0x44 (bytes len 4) followed by 4 octets. + assert beq(bytes, h('44c0a80101')) + got := cbor.decode[Ipv4](bytes, cbor.DecodeOpts{})! + assert got.octets[0] == 192 + assert got.octets[1] == 168 + assert got.octets[2] == 1 + assert got.octets[3] == 1 +} + +// --------------------------------------------------------------------- +// Integer-range checks on decode +// --------------------------------------------------------------------- + +fn test_int_range_overflow_rejected() { + // 256 doesn't fit u8. + bytes := cbor.encode[u16](u16(256), cbor.EncodeOpts{})! + if _ := cbor.decode[u8](bytes, cbor.DecodeOpts{}) { + assert false, 'expected u8 range error' + } +} + +fn test_negative_to_unsigned_rejected() { + bytes := cbor.encode[i64](-1, cbor.EncodeOpts{})! + if _ := cbor.decode[u64](bytes, cbor.DecodeOpts{}) { + assert false, 'expected u64 range error' + } +} diff --git a/vlib/encoding/cbor/tests/rfc8949_appendix_a_test.v b/vlib/encoding/cbor/tests/rfc8949_appendix_a_test.v new file mode 100644 index 00000000000000..507539798e7c7c --- /dev/null +++ b/vlib/encoding/cbor/tests/rfc8949_appendix_a_test.v @@ -0,0 +1,781 @@ +// RFC 8949 Appendix A conformance tests. Each entry is taken verbatim +// from the published `cbor/test-vectors` list. Roundtrip-true entries +// are exercised in both directions (encode-and-compare-bytes plus +// decode-and-compare-value); roundtrip-false entries are decode-only, +// which matches the test-vector flag. +// +// Hex bytes are kept as string literals so the diffs against the RFC +// are obvious when reading the file. +module main + +import encoding.cbor +import encoding.hex +import math + +fn h(s string) []u8 { + return hex.decode(s) or { panic('invalid hex: ${s}') } +} + +fn bytes_eq(a []u8, b []u8) bool { + if a.len != b.len { + return false + } + for i in 0 .. a.len { + if a[i] != b[i] { + return false + } + } + return true +} + +fn assert_encode_uint(v u64, hex_expected string) { + got := cbor.encode[u64](v, cbor.EncodeOpts{}) or { panic(err) } + expected := h(hex_expected) + if !bytes_eq(got, expected) { + panic('encode u64 ${v}: got ${hex.encode(got)}, want ${hex_expected}') + } +} + +fn assert_encode_int(v i64, hex_expected string) { + got := cbor.encode[i64](v, cbor.EncodeOpts{}) or { panic(err) } + expected := h(hex_expected) + if !bytes_eq(got, expected) { + panic('encode i64 ${v}: got ${hex.encode(got)}, want ${hex_expected}') + } +} + +// --------------------------------------------------------------------- +// Unsigned integers (major type 0) +// --------------------------------------------------------------------- + +fn test_unsigned_zero() { + assert_encode_uint(0, '00') +} + +fn test_unsigned_small() { + assert_encode_uint(1, '01') + assert_encode_uint(10, '0a') + assert_encode_uint(23, '17') +} + +fn test_unsigned_one_byte() { + assert_encode_uint(24, '1818') + assert_encode_uint(25, '1819') + assert_encode_uint(100, '1864') + assert_encode_uint(0xff, '18ff') +} + +fn test_unsigned_two_bytes() { + assert_encode_uint(1000, '1903e8') + assert_encode_uint(0xffff, '19ffff') +} + +fn test_unsigned_four_bytes() { + assert_encode_uint(1_000_000, '1a000f4240') + assert_encode_uint(0xffffffff, '1affffffff') +} + +fn test_unsigned_eight_bytes() { + assert_encode_uint(1_000_000_000_000, '1b000000e8d4a51000') + assert_encode_uint(u64(0xffffffffffffffff), '1bffffffffffffffff') +} + +// --------------------------------------------------------------------- +// Negative integers (major type 1) +// --------------------------------------------------------------------- + +fn test_negative_small() { + assert_encode_int(-1, '20') + assert_encode_int(-10, '29') +} + +fn test_negative_one_byte() { + assert_encode_int(-100, '3863') +} + +fn test_negative_two_bytes() { + assert_encode_int(-1000, '3903e7') +} + +fn test_negative_extreme() { + // -2^63 still fits i64. + assert_encode_int(-9_223_372_036_854_775_808, '3b7fffffffffffffff') +} + +fn test_negative_lower_bound_via_packer() { + // -2^64 (lower bound of CBOR negative ints) requires the full u64 + // argument and can't be represented as an i64. + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_negative_arg(u64(0xffffffffffffffff)) + assert bytes_eq(p.bytes(), h('3bffffffffffffffff')) +} + +// --------------------------------------------------------------------- +// Floats — preferred serialisation (RFC 8949 §4.2.2) +// --------------------------------------------------------------------- + +fn assert_encode_float(v f64, hex_expected string) { + got := cbor.encode[f64](v, cbor.EncodeOpts{}) or { panic(err) } + expected := h(hex_expected) + if !bytes_eq(got, expected) { + panic('encode f64 ${v}: got ${hex.encode(got)}, want ${hex_expected}') + } +} + +fn test_float_zero() { + assert_encode_float(0.0, 'f90000') + // -0.0 — distinct bit pattern from +0.0. + neg_zero := math.f64_from_bits(u64(0x8000000000000000)) + assert_encode_float(neg_zero, 'f98000') +} + +fn test_float_simple_values() { + assert_encode_float(1.0, 'f93c00') + assert_encode_float(1.5, 'f93e00') + assert_encode_float(65504.0, 'f97bff') + assert_encode_float(-4.0, 'f9c400') +} + +fn test_float_subnormal_half() { + // 2^-24 — smallest positive half subnormal (preserved exactly in f32/f64). + assert_encode_float(5.960464477539063e-08, 'f90001') + // 2^-14 — smallest positive normal half. + assert_encode_float(6.103515625e-05, 'f90400') +} + +fn test_float_single() { + assert_encode_float(100000.0, 'fa47c35000') + assert_encode_float(3.4028234663852886e+38, 'fa7f7fffff') +} + +fn test_float_double() { + assert_encode_float(1.1, 'fb3ff199999999999a') + assert_encode_float(1.0e+300, 'fb7e37e43c8800759c') + assert_encode_float(-4.1, 'fbc010666666666666') +} + +fn test_float_special_values() { + assert_encode_float(math.inf(1), 'f97c00') + assert_encode_float(math.inf(-1), 'f9fc00') + // NaN — encoder collapses to the canonical quiet NaN. + got := cbor.encode[f64](math.nan(), cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('f97e00')), 'NaN encoded as ${hex.encode(got)}, want f97e00' +} + +// --------------------------------------------------------------------- +// Booleans, null, undefined +// --------------------------------------------------------------------- + +fn test_bool_false() { + got := cbor.encode[bool](false, cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, [u8(0xf4)]) +} + +fn test_bool_true() { + got := cbor.encode[bool](true, cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, [u8(0xf5)]) +} + +fn test_null_via_packer() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_null() + assert bytes_eq(p.bytes(), [u8(0xf6)]) +} + +fn test_undefined_via_packer() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_undefined() + assert bytes_eq(p.bytes(), [u8(0xf7)]) +} + +fn test_simple_values() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_simple(16) or { panic(err) } + assert bytes_eq(p.bytes(), h('f0')) + + mut p3 := cbor.new_packer(cbor.EncodeOpts{}) + p3.pack_simple(255) or { panic(err) } + assert bytes_eq(p3.bytes(), h('f8ff')) +} + +// RFC 8949 §3.3: encoder MUST NOT issue two-byte sequences starting +// with 0xf8 and continuing with a byte < 0x20. We refuse to emit such a +// value, and the decoder rejects them on input. +fn test_simple_values_rfc8949_strictness() { + // Encoding side: pack_simple(24..31) returns an error. + mut p := cbor.new_packer(cbor.EncodeOpts{}) + if _ := p.pack_simple(24) { + assert false, 'expected pack_simple(24) to fail' + } + if _ := p.pack_simple(31) { + assert false, 'expected pack_simple(31) to fail' + } + // Decoder side: f818 (simple(24) two-byte form) is malformed. + cbor.decode[cbor.Value](h('f818'), cbor.DecodeOpts{}) or { + assert err.msg().contains('1-byte form'), 'unexpected: ${err.msg()}' + return + } + assert false, 'expected decoder to reject simple(24) two-byte form' +} + +// --------------------------------------------------------------------- +// Byte and text strings +// --------------------------------------------------------------------- + +fn test_empty_byte_string() { + got := cbor.encode[[]u8]([]u8{}, cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('40')) +} + +fn test_byte_string_4() { + got := cbor.encode[[]u8]([u8(0x01), 0x02, 0x03, 0x04], cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('4401020304')) +} + +fn test_empty_text_string() { + got := cbor.encode[string]('', cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('60')) +} + +fn test_text_a() { + got := cbor.encode[string]('a', cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('6161')) +} + +fn test_text_ietf() { + got := cbor.encode[string]('IETF', cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('6449455446')) +} + +fn test_text_escaped() { + got := cbor.encode[string]('"\\', cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('62225c')) +} + +fn test_text_utf8_2byte() { + got := cbor.encode[string]('ü', cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('62c3bc')) +} + +fn test_text_utf8_3byte() { + got := cbor.encode[string]('水', cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('63e6b0b4')) +} + +fn test_text_utf8_4byte() { + got := cbor.encode[string]('𐅑', cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('64f0908591')) +} + +// --------------------------------------------------------------------- +// Arrays +// --------------------------------------------------------------------- + +fn test_empty_array() { + got := cbor.encode[[]int]([]int{}, cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('80')) +} + +fn test_array_3() { + got := cbor.encode[[]int]([1, 2, 3], cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('83010203')) +} + +fn test_array_nested_via_value() { + v := cbor.Value(cbor.Array{ + elements: [ + cbor.Value(cbor.new_uint(1)), + cbor.Value(cbor.Array{ + elements: [cbor.Value(cbor.new_uint(2)), cbor.Value(cbor.new_uint(3))] + }), + cbor.Value(cbor.Array{ + elements: [cbor.Value(cbor.new_uint(4)), cbor.Value(cbor.new_uint(5))] + }), + ] + }) + got := cbor.encode_value(v, cbor.EncodeOpts{}) + assert bytes_eq(got, h('8301820203820405')) +} + +fn test_array_25_items() { + mut elements := []u64{cap: 25} + for i in 0 .. 25 { + elements << u64(i + 1) + } + got := cbor.encode[[]u64](elements, cbor.EncodeOpts{}) or { panic(err) } + want := '98190102030405060708090a0b0c0d0e0f101112131415161718181819' + assert bytes_eq(got, h(want)) +} + +// --------------------------------------------------------------------- +// Maps +// --------------------------------------------------------------------- + +fn test_empty_map() { + got := cbor.encode[map[string]int](map[string]int{}, cbor.EncodeOpts{}) or { panic(err) } + assert bytes_eq(got, h('a0')) +} + +fn test_int_key_map_via_packer() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_map_header(2) + p.pack_int(1) + p.pack_int(2) + p.pack_int(3) + p.pack_int(4) + assert bytes_eq(p.bytes(), h('a201020304')) +} + +fn test_string_key_map_with_array_value() { + v := cbor.Value(cbor.Map{ + pairs: [ + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'a' + }) + value: cbor.Value(cbor.new_uint(1)) + }, + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'b' + }) + value: cbor.Value(cbor.Array{ + elements: [cbor.Value(cbor.new_uint(2)), cbor.Value(cbor.new_uint(3))] + }) + }, + ] + }) + got := cbor.encode_value(v, cbor.EncodeOpts{}) + assert bytes_eq(got, h('a26161016162820203')) +} + +fn test_array_with_map_inside() { + v := cbor.Value(cbor.Array{ + elements: [ + cbor.Value(cbor.Text{ + value: 'a' + }), + cbor.Value(cbor.Map{ + pairs: [ + cbor.MapPair{ + key: cbor.Value(cbor.Text{ + value: 'b' + }) + value: cbor.Value(cbor.Text{ + value: 'c' + }) + }, + ] + }), + ] + }) + got := cbor.encode_value(v, cbor.EncodeOpts{}) + assert bytes_eq(got, h('826161a161626163')) +} + +// --------------------------------------------------------------------- +// Tags +// --------------------------------------------------------------------- + +fn test_tag_date_time() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_tag(0) + p.pack_text('2013-03-21T20:04:00Z') + assert bytes_eq(p.bytes(), h('c074323031332d30332d32315432303a30343a30305a')) +} + +fn test_tag_epoch_int() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_tag(1) + p.pack_int(1363896240) + assert bytes_eq(p.bytes(), h('c11a514b67b0')) +} + +fn test_tag_epoch_float() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_tag(1) + p.pack_float64(1363896240.5) + assert bytes_eq(p.bytes(), h('c1fb41d452d9ec200000')) +} + +fn test_tag_uri() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_tag(32) + p.pack_text('http://www.example.com') + assert bytes_eq(p.bytes(), h('d82076687474703a2f2f7777772e6578616d706c652e636f6d')) +} + +fn test_tag_unsigned_bignum() { + // Tag 2 + 9-byte big-endian magnitude for 18446744073709551616 = 2^64. + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_tag(2) + p.pack_bytes([u8(0x01), 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + assert bytes_eq(p.bytes(), h('c249010000000000000000')) +} + +// --------------------------------------------------------------------- +// Indefinite-length items +// --------------------------------------------------------------------- + +fn test_indefinite_byte_string() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_bytes_indef() + p.pack_bytes([u8(0x01), 0x02]) + p.pack_bytes([u8(0x03), 0x04, 0x05]) + p.pack_break() + assert bytes_eq(p.bytes(), h('5f42010243030405ff')) +} + +fn test_indefinite_text_string() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_text_indef() + p.pack_text('strea') + p.pack_text('ming') + p.pack_break() + assert bytes_eq(p.bytes(), h('7f657374726561646d696e67ff')) +} + +fn test_indefinite_array() { + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_array_indef() + p.pack_break() + assert bytes_eq(p.bytes(), h('9fff')) +} + +fn test_indefinite_array_with_definite_inside() { + // [_ 1, [2, 3], [_ 4, 5]] + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_array_indef() + p.pack_int(1) + p.pack_array_header(2) + p.pack_int(2) + p.pack_int(3) + p.pack_array_indef() + p.pack_int(4) + p.pack_int(5) + p.pack_break() + p.pack_break() + assert bytes_eq(p.bytes(), h('9f018202039f0405ffff')) +} + +fn test_indefinite_map() { + // {_ "a": 1, "b": [_ 2, 3]} + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_map_indef() + p.pack_text('a') + p.pack_int(1) + p.pack_text('b') + p.pack_array_indef() + p.pack_int(2) + p.pack_int(3) + p.pack_break() + p.pack_break() + assert bytes_eq(p.bytes(), h('bf61610161629f0203ffff')) +} + +// --------------------------------------------------------------------- +// Decode side: every Appendix A vector decodes to the right value +// --------------------------------------------------------------------- + +struct UintCase { + hex string + val u64 +} + +struct IntCase { + hex string + val i64 +} + +fn test_decode_unsigned() { + for pair in [ + UintCase{ + hex: '00' + val: 0 + }, + UintCase{ + hex: '17' + val: 23 + }, + UintCase{ + hex: '1818' + val: 24 + }, + UintCase{ + hex: '1864' + val: 100 + }, + UintCase{ + hex: '1903e8' + val: 1000 + }, + UintCase{ + hex: '1a000f4240' + val: 1_000_000 + }, + UintCase{ + hex: '1b000000e8d4a51000' + val: 1_000_000_000_000 + }, + UintCase{ + hex: '1bffffffffffffffff' + val: u64(0xffffffffffffffff) + }, + ] { + v := cbor.decode[u64](h(pair.hex), cbor.DecodeOpts{}) or { + panic('decode ${pair.hex}: ${err}') + } + assert v == pair.val, 'decoded ${pair.hex}: got ${v}, want ${pair.val}' + } +} + +fn test_decode_negative() { + for pair in [ + IntCase{ + hex: '20' + val: -1 + }, + IntCase{ + hex: '29' + val: -10 + }, + IntCase{ + hex: '3863' + val: -100 + }, + IntCase{ + hex: '3903e7' + val: -1000 + }, + IntCase{ + hex: '3b7fffffffffffffff' + val: i64(-9_223_372_036_854_775_808) + }, + ] { + v := cbor.decode[i64](h(pair.hex), cbor.DecodeOpts{}) or { + panic('decode ${pair.hex}: ${err}') + } + assert v == pair.val, 'decoded ${pair.hex}: got ${v}, want ${pair.val}' + } +} + +fn test_decode_floats() { + // Half precision. + v := cbor.decode[f64](h('f93c00'), cbor.DecodeOpts{}) or { panic(err) } + assert v == 1.0 + v2 := cbor.decode[f64](h('f93e00'), cbor.DecodeOpts{}) or { panic(err) } + assert v2 == 1.5 + v3 := cbor.decode[f64](h('f97bff'), cbor.DecodeOpts{}) or { panic(err) } + assert v3 == 65504.0 + // Subnormal half. + v4 := cbor.decode[f64](h('f90001'), cbor.DecodeOpts{}) or { panic(err) } + assert v4 == 5.960464477539063e-08, 'subnormal half: ${v4}' + // f32 / f64. + v5 := cbor.decode[f64](h('fa47c35000'), cbor.DecodeOpts{}) or { panic(err) } + assert v5 == 100000.0 + v6 := cbor.decode[f64](h('fb3ff199999999999a'), cbor.DecodeOpts{}) or { panic(err) } + assert v6 == 1.1 + // Inf / -Inf. + v7 := cbor.decode[f64](h('f97c00'), cbor.DecodeOpts{}) or { panic(err) } + assert math.is_inf(v7, 1) + v8 := cbor.decode[f64](h('f9fc00'), cbor.DecodeOpts{}) or { panic(err) } + assert math.is_inf(v8, -1) + // NaN. + v9 := cbor.decode[f64](h('f97e00'), cbor.DecodeOpts{}) or { panic(err) } + assert math.is_nan(v9) + // Alternate non-canonical encodings (roundtrip=false but must decode). + for hex_str in ['fa7f800000', 'fb7ff0000000000000'] { + val := cbor.decode[f64](h(hex_str), cbor.DecodeOpts{}) or { panic(err) } + assert math.is_inf(val, 1), '${hex_str} → ${val}' + } + for hex_str in ['faff800000', 'fbfff0000000000000'] { + val := cbor.decode[f64](h(hex_str), cbor.DecodeOpts{}) or { panic(err) } + assert math.is_inf(val, -1), '${hex_str} → ${val}' + } + for hex_str in ['fa7fc00000', 'fb7ff8000000000000'] { + val := cbor.decode[f64](h(hex_str), cbor.DecodeOpts{}) or { panic(err) } + assert math.is_nan(val), '${hex_str} → ${val}' + } +} + +fn test_decode_bool_null() { + bf := cbor.decode[bool](h('f4'), cbor.DecodeOpts{}) or { panic(err) } + assert bf == false + bt := cbor.decode[bool](h('f5'), cbor.DecodeOpts{}) or { panic(err) } + assert bt == true + v := cbor.decode[cbor.Value](h('f6'), cbor.DecodeOpts{}) or { panic(err) } + assert v.is_nil() + u := cbor.decode[cbor.Value](h('f7'), cbor.DecodeOpts{}) or { panic(err) } + assert u.is_undefined() +} + +fn test_decode_simple_extended() { + // Inline form for simple(16). + v0 := cbor.decode[cbor.Value](h('f0'), cbor.DecodeOpts{}) or { panic(err) } + assert v0 is cbor.Simple + if v0 is cbor.Simple { + assert v0.value == 16 + } + // 1-byte form for simple(32) — first valid two-byte simple value. + mut p := cbor.new_packer(cbor.EncodeOpts{}) + p.pack_simple(32) or { panic(err) } + v32 := cbor.decode[cbor.Value](p.bytes(), cbor.DecodeOpts{}) or { panic(err) } + assert v32 is cbor.Simple + if v32 is cbor.Simple { + assert v32.value == 32 + } + // Top of the range. + v2 := cbor.decode[cbor.Value](h('f8ff'), cbor.DecodeOpts{}) or { panic(err) } + if v2 is cbor.Simple { + assert v2.value == 255 + } +} + +fn test_decode_strings() { + a := cbor.decode[string](h('60'), cbor.DecodeOpts{}) or { panic(err) } + assert a == '' + b := cbor.decode[string](h('6161'), cbor.DecodeOpts{}) or { panic(err) } + assert b == 'a' + c := cbor.decode[string](h('6449455446'), cbor.DecodeOpts{}) or { panic(err) } + assert c == 'IETF' + d := cbor.decode[string](h('62c3bc'), cbor.DecodeOpts{}) or { panic(err) } + assert d == 'ü' + e := cbor.decode[string](h('63e6b0b4'), cbor.DecodeOpts{}) or { panic(err) } + assert e == '水' + f := cbor.decode[string](h('64f0908591'), cbor.DecodeOpts{}) or { panic(err) } + assert f == '𐅑' +} + +fn test_decode_array() { + v := cbor.decode[[]int](h('83010203'), cbor.DecodeOpts{}) or { panic(err) } + assert v == [1, 2, 3] +} + +fn test_decode_map() { + v := cbor.decode[map[string]int](h('a26161016162820203'), cbor.DecodeOpts{}) or { + // This vector has b → [2,3], not int — so this decode should fail. + assert err.msg().contains('mismatch') || err.msg().contains('overflow') + return + } + _ = v + assert false, 'expected type mismatch on map[string]int decode of array value' +} + +fn test_decode_indefinite_text() { + // "stream"+"ing" + v := cbor.decode[string](h('7f657374726561646d696e67ff'), cbor.DecodeOpts{}) or { panic(err) } + assert v == 'streaming' +} + +fn test_decode_indefinite_array() { + v := cbor.decode[[]int](h('9fff'), cbor.DecodeOpts{}) or { panic(err) } + assert v == []int{} + v2 := cbor.decode[[]int](h('9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff'), cbor.DecodeOpts{}) or { + panic(err) + } + mut want := []int{cap: 25} + for i in 0 .. 25 { + want << i + 1 + } + assert v2 == want +} + +fn test_decode_indefinite_bytes() { + v := cbor.decode[[]u8](h('5f42010243030405ff'), cbor.DecodeOpts{}) or { panic(err) } + assert bytes_eq(v, [u8(0x01), 0x02, 0x03, 0x04, 0x05]) +} + +// --------------------------------------------------------------------- +// Round-trip for every roundtrip=true vector via the Value tree +// --------------------------------------------------------------------- + +const roundtrip_vectors = [ + '00', + '01', + '0a', + '17', + '1818', + '1819', + '1864', + '1903e8', + '1a000f4240', + '1b000000e8d4a51000', + '1bffffffffffffffff', + '3bffffffffffffffff', + '20', + '29', + '3863', + '3903e7', + 'f90000', + 'f98000', + 'f93c00', + 'fb3ff199999999999a', + 'f93e00', + 'f97bff', + 'fa47c35000', + 'fa7f7fffff', + 'fb7e37e43c8800759c', + 'f90001', + 'f90400', + 'f9c400', + 'fbc010666666666666', + 'f97c00', + 'f97e00', + 'f9fc00', + 'f4', + 'f5', + 'f6', + 'f7', + 'f0', + 'f8ff', + '40', + '4401020304', + '60', + '6161', + '6449455446', + '62225c', + '62c3bc', + '63e6b0b4', + '64f0908591', + '80', + '83010203', + '8301820203820405', + '98190102030405060708090a0b0c0d0e0f101112131415161718181819', + 'a0', + 'a201020304', + 'a26161016162820203', + '826161a161626163', + 'a56161614161626142616361436164614461656145', +] + +fn test_value_roundtrip_all_canonical_vectors() { + for hex_str in roundtrip_vectors { + input := h(hex_str) + decoded := cbor.decode[cbor.Value](input, cbor.DecodeOpts{}) or { + panic('decode ${hex_str}: ${err}') + } + got := cbor.encode_value(decoded, cbor.EncodeOpts{}) + assert bytes_eq(got, input), 'roundtrip ${hex_str}: got ${hex.encode(got)}' + } +} + +// Tag-bearing vectors round-trip via the Value tree as well. +fn test_value_roundtrip_tag_vectors() { + for hex_str in [ + 'c074323031332d30332d32315432303a30343a30305a', + 'c11a514b67b0', + 'c1fb41d452d9ec200000', + 'd74401020304', + 'd818456449455446', + 'd82076687474703a2f2f7777772e6578616d706c652e636f6d', + 'c249010000000000000000', + 'c349010000000000000000', + ] { + input := h(hex_str) + decoded := cbor.decode[cbor.Value](input, cbor.DecodeOpts{}) or { + panic('decode ${hex_str}: ${err}') + } + got := cbor.encode_value(decoded, cbor.EncodeOpts{}) + assert bytes_eq(got, input), 'tag roundtrip ${hex_str}: got ${hex.encode(got)}' + } +} diff --git a/vlib/encoding/cbor/tests/security_test.v b/vlib/encoding/cbor/tests/security_test.v new file mode 100644 index 00000000000000..39c49617fd02f0 --- /dev/null +++ b/vlib/encoding/cbor/tests/security_test.v @@ -0,0 +1,250 @@ +// Tests that the decoder is safe against adversarial input: malformed +// initial bytes, premature EOF, depth bombs, indefinite-length nesting, +// and invalid UTF-8. +module main + +import encoding.cbor +import encoding.hex + +fn h(s string) []u8 { + return hex.decode(s) or { panic('invalid hex: ${s}') } +} + +// --------------------------------------------------------------------- +// EOF handling +// --------------------------------------------------------------------- + +fn test_eof_truncated_uint() { + // 0x18 = uint(1-byte arg), but no following byte. + if _ := cbor.decode[u64](h('18'), cbor.DecodeOpts{}) { + assert false, 'expected EOF error' + } +} + +fn test_eof_truncated_array() { + // 0x83 = array of 3, but only 1 element follows. + if _ := cbor.decode[[]int](h('8301'), cbor.DecodeOpts{}) { + assert false, 'expected EOF error in array' + } +} + +fn test_eof_truncated_string() { + // 0x65 = text len 5, but only 3 bytes follow. + if _ := cbor.decode[string](h('656162'), cbor.DecodeOpts{}) { + assert false, 'expected EOF error in text' + } +} + +// --------------------------------------------------------------------- +// Reserved additional info +// --------------------------------------------------------------------- + +fn test_reserved_info_rejected() { + // 0x1c = major 0, info 28 (reserved). + if _ := cbor.decode[cbor.Value](h('1c'), cbor.DecodeOpts{}) { + assert false, 'expected malformed for info 28' + } +} + +// --------------------------------------------------------------------- +// Depth bomb +// --------------------------------------------------------------------- + +fn test_depth_bomb_rejected() { + // Build an indefinite-length array nested 1000 deep. + mut deep := []u8{cap: 2002} + for _ in 0 .. 1000 { + deep << 0x9f // start indefinite array + } + for _ in 0 .. 1000 { + deep << 0xff // close + } + if _ := cbor.decode[cbor.Value](deep, cbor.DecodeOpts{ max_depth: 16 }) { + assert false, 'expected MaxDepthError' + } +} + +// --------------------------------------------------------------------- +// Indefinite-length string with mismatched chunk +// --------------------------------------------------------------------- + +fn test_indef_text_with_byte_chunk_rejected() { + // 0x7f = indef text. 0x42 = bytes(2). Should fail. + if _ := cbor.decode[string](h('7f4201020203ff'), cbor.DecodeOpts{}) { + assert false, 'expected malformed for mixed indef-text chunk' + } +} + +fn test_nested_indef_text_rejected() { + // 0x7f7f...ff is indef text containing indef text — disallowed. + if _ := cbor.decode[string](h('7f7f60ffff'), cbor.DecodeOpts{}) { + assert false, 'expected malformed for nested indef text' + } +} + +// --------------------------------------------------------------------- +// UTF-8 validation +// --------------------------------------------------------------------- + +fn test_invalid_utf8_rejected() { + // 0x62 = text len 2, then invalid 2-byte sequence 0xc3 0x28. + if _ := cbor.decode[string](h('62c328'), cbor.DecodeOpts{}) { + assert false, 'expected InvalidUtf8Error' + } +} + +fn test_invalid_utf8_can_be_disabled() { + // Same input but with validate_utf8 = false succeeds (caller + // accepts responsibility for handling raw bytes). + got := cbor.decode[string](h('62c328'), cbor.DecodeOpts{ validate_utf8: false }) or { + panic('expected success: ${err}') + } + assert got.len == 2 +} + +fn test_invalid_utf8_overlong_rejected() { + // "/" = 0x2f, but encoded as 2-byte overlong 0xc0 0xaf — rejected. + if _ := cbor.decode[string](h('62c0af'), cbor.DecodeOpts{}) { + assert false, 'expected InvalidUtf8Error for overlong' + } +} + +fn test_invalid_utf8_surrogate_rejected() { + // U+D800 (high surrogate) in 3-byte form: 0xed 0xa0 0x80. + if _ := cbor.decode[string](h('63eda080'), cbor.DecodeOpts{}) { + assert false, 'expected InvalidUtf8Error for surrogate' + } +} + +// --------------------------------------------------------------------- +// Unknown fields in struct decode +// --------------------------------------------------------------------- + +struct Strict { + a int +} + +fn test_unknown_field_tolerated_by_default() { + // {"a": 1, "b": 2} + bytes := h('a26161016162 02'.replace(' ', '')) + got := cbor.decode[Strict](bytes, cbor.DecodeOpts{})! + assert got.a == 1 +} + +fn test_unknown_field_rejected_when_opted_in() { + bytes := h('a26161016162 02'.replace(' ', '')) + if _ := cbor.decode[Strict](bytes, cbor.DecodeOpts{ deny_unknown_fields: true }) { + assert false, 'expected UnknownFieldError' + } +} + +// --------------------------------------------------------------------- +// Native tag 0/1 content-type validation (RFC 8949 §3.4.1). Unlike a +// permissive decoder, we reject tag 0 wrapping non-text and tag 1 +// wrapping non-numbers — same behaviour as QCBOR (the IETF reference). +// These cases come from the cbor-wg/bad conformance corpus. +// --------------------------------------------------------------------- + +fn test_tag0_wrapping_map_rejected() { + // c0 a1 61 61 00 = tag(0, {"a": 0}) — tag 0 must be tstr. + if _ := cbor.decode[cbor.Value](h('c0a1616100'), cbor.DecodeOpts{}) { + assert false, 'expected tag-0 type rejection' + } +} + +fn test_tag1_wrapping_map_rejected() { + // c1 a1 61 61 00 = tag(1, {"a": 0}) — tag 1 must be int or float. + if _ := cbor.decode[cbor.Value](h('c1a1616100'), cbor.DecodeOpts{}) { + assert false, 'expected tag-1 type rejection' + } +} + +fn test_tag0_wrapping_text_accepted() { + // c0 74 ... = tag(0, "2013-03-21T20:04:00Z") — well-formed. + v := cbor.decode[cbor.Value](h('c074323031332d30332d32315432303a30343a30305a'), cbor.DecodeOpts{}) or { + assert false, 'tag 0 + text MUST be accepted: ${err}' + return + } + assert v is cbor.Tag +} + +fn test_tag1_wrapping_int_or_float_accepted() { + v := cbor.decode[cbor.Value](h('c11a514b67b0'), cbor.DecodeOpts{}) or { + assert false, 'tag 1 + int MUST be accepted: ${err}' + return + } + assert v is cbor.Tag + + v2 := cbor.decode[cbor.Value](h('c1fb41d452d9ec200000'), cbor.DecodeOpts{}) or { + assert false, 'tag 1 + float MUST be accepted: ${err}' + return + } + assert v2 is cbor.Tag +} + +// --------------------------------------------------------------------- +// Header-length overflow: lengths beyond i64::max must be rejected, +// not silently wrapped to -1 (which would alias the indefinite-length +// sentinel and steer callers into the wrong loop). See decoder.v +// `unpack_array_header` / `unpack_map_header`. +// --------------------------------------------------------------------- + +fn test_array_header_oversized_length_rejected() { + // 9b ff ff ff ff ff ff ff ff = array, info=27, arg=u64::max. + mut u := cbor.new_unpacker(h('9bffffffffffffffff'), cbor.DecodeOpts{}) + if n := u.unpack_array_header() { + assert false, 'expected oversized length rejection, got ${n}' + } +} + +fn test_map_header_oversized_length_rejected() { + // bb ff ff ff ff ff ff ff ff = map, info=27, arg=u64::max. + mut u := cbor.new_unpacker(h('bbffffffffffffffff'), cbor.DecodeOpts{}) + if n := u.unpack_map_header() { + assert false, 'expected oversized length rejection, got ${n}' + } +} + +fn test_array_header_at_i64_max_accepted() { + // 9b 7f ff ff ff ff ff ff ff = array, info=27, arg=i64::max — boundary. + mut u := cbor.new_unpacker(h('9b7fffffffffffffff'), cbor.DecodeOpts{}) + n := u.unpack_array_header() or { + assert false, 'i64::max boundary must succeed: ${err}' + return + } + assert n == max_i64 +} + +// --------------------------------------------------------------------- +// skip_value MUST enforce RFC 8949 §3.2.3 chunk rules for indefinite +// strings: each chunk must be a definite-length string of the same +// major type. Otherwise the skip path silently accepts what +// unpack_text / unpack_bytes correctly reject — letting malformed +// CBOR through RawMessage / Unmarshaler / unknown-field skipping. +// --------------------------------------------------------------------- + +fn test_skip_value_rejects_cross_type_indef_string_chunk() { + // 7f 41 00 ff = indef text containing one bytes chunk (major=2), + // then break. unpack_text rejects this; skip_value used to accept. + mut u := cbor.new_unpacker(h('7f4100ff'), cbor.DecodeOpts{}) + if _ := u.skip_value() { + assert false, 'skip_value must reject cross-type indef chunk' + } +} + +fn test_skip_value_rejects_nested_indef_string_chunk() { + // 7f 7f 61 61 ff ff = indef text whose chunk is itself indefinite. + mut u := cbor.new_unpacker(h('7f7f6161ffff'), cbor.DecodeOpts{}) + if _ := u.skip_value() { + assert false, 'skip_value must reject nested indef chunk' + } +} + +fn test_raw_message_rejects_malformed_indef_string() { + // Same payload as above, but exercised through the RawMessage path + // (which calls skip_value internally to compute the slice bounds). + mut u := cbor.new_unpacker(h('7f4100ff'), cbor.DecodeOpts{}) + if _ := u.unpack_raw() { + assert false, 'unpack_raw must reject cross-type indef chunk' + } +} diff --git a/vlib/encoding/cbor/tests/smoke_test.v b/vlib/encoding/cbor/tests/smoke_test.v new file mode 100644 index 00000000000000..5bdf4fbc2dc5f6 --- /dev/null +++ b/vlib/encoding/cbor/tests/smoke_test.v @@ -0,0 +1,8 @@ +module main + +import encoding.cbor + +fn test_smoke_uint() { + out := cbor.encode[u64](u64(0), cbor.EncodeOpts{})! + assert out == [u8(0x00)] +} diff --git a/vlib/encoding/cbor/tests/time_test.v b/vlib/encoding/cbor/tests/time_test.v new file mode 100644 index 00000000000000..96240e3d67b775 --- /dev/null +++ b/vlib/encoding/cbor/tests/time_test.v @@ -0,0 +1,39 @@ +// Tests that `time.Time` auto-tags as RFC 8949 tag 1 (epoch seconds) +// on encode and accepts both tag 0 (RFC 3339 text) and tag 1 (epoch +// seconds, integer or float) on decode. +module main + +import encoding.cbor +import encoding.hex +import time + +fn h(s string) []u8 { + return hex.decode(s) or { panic('invalid hex: ${s}') } +} + +fn test_time_encode_tag1() { + t := time.unix(1363896240) + bytes := cbor.encode[time.Time](t, cbor.EncodeOpts{})! + // Wire form: c1 (tag 1) + 1a 51 4b 67 b0 (4-byte uint). + assert bytes == h('c11a514b67b0'), 'got ${hex.encode(bytes)}' +} + +fn test_time_decode_tag1() { + got := cbor.decode[time.Time](h('c11a514b67b0'), cbor.DecodeOpts{})! + assert got.unix() == 1363896240 +} + +fn test_time_decode_tag0_iso8601() { + // Tag 0 + text "2013-03-21T20:04:00Z". + got := + cbor.decode[time.Time](h('c074323031332d30332d32315432303a30343a30305a'), cbor.DecodeOpts{})! + assert got.unix() == 1363896240 +} + +fn test_time_decode_tag1_float() { + got := cbor.decode[time.Time](h('c1fb41d452d9ec200000'), cbor.DecodeOpts{})! + // 1363896240.5 — half-second offset. + assert got.unix() == 1363896240 + // Sub-second component is non-zero (~500ms). + assert got.nanosecond > 0 +} diff --git a/vlib/encoding/cbor/tests/upstream_appendix_a_test.v b/vlib/encoding/cbor/tests/upstream_appendix_a_test.v new file mode 100644 index 00000000000000..cf293c15cb23ff --- /dev/null +++ b/vlib/encoding/cbor/tests/upstream_appendix_a_test.v @@ -0,0 +1,348 @@ +// Third-party conformance: drives the entire `appendix_a.json` corpus +// from https://github.com/cbor/test-vectors (the same file that ciborium, +// serde_cbor and cbor2 use). Each entry is checked against: +// +// * its `hex` round-trips byte-exact when `roundtrip == true` +// * its `decoded` JSON value matches the V-decoded `cbor.Value` +// * for entries that only carry a `diagnostic` (NaN, Infinity, undefined, +// simple(N), tag(N)(...), h'…'), structural sanity is enforced via +// the diagnostic prefix. +// +// The fixture file lives next to this test so the corpus is reproducible +// and offline-buildable. +module main + +import encoding.cbor +import encoding.hex +import math +import os +import x.json2 + +fn h(s string) []u8 { + return hex.decode(s) or { panic('bad hex ${s}') } +} + +fn bytes_eq(a []u8, b []u8) bool { + if a.len != b.len { + return false + } + for i in 0 .. a.len { + if a[i] != b[i] { + return false + } + } + return true +} + +// match_decoded compares a V `Value` against a parsed JSON value from the +// vector's `decoded` field. Returns an error string on mismatch; empty on success. +fn match_decoded(v cbor.Value, j json2.Any) string { + match j { + i64 { + match v { + cbor.IntNum { + if j >= 0 { + if v.negative || v.magnitude != u64(j) { + return 'int ${j} ↔ IntNum(neg=${v.negative}, mag=${v.magnitude})' + } + } else { + mag := u64(-(j + 1)) + if !v.negative || v.magnitude != mag { + return 'int ${j} ↔ IntNum(neg=${v.negative}, mag=${v.magnitude})' + } + } + return '' + } + cbor.Tag { + // JSON ints beyond ±2^63 land here as i64-clamped or as a string; + // real bignum vectors use the `decoded` field with a u64 / negative, + // so this branch shouldn't fire for that case. + return 'unexpected Tag for plain int ${j}' + } + else { + return 'expected int ${j}, got ${v.type_name()}' + } + } + } + u64 { + if v is cbor.IntNum { + if v.negative || v.magnitude != j { + return 'uint ${j} ↔ IntNum(neg=${v.negative}, mag=${v.magnitude})' + } + return '' + } + return 'expected uint ${j}' + } + f64 { + // JSON has a single number type, so an integer-valued vector + // arrives here as f64 even when the CBOR is major type 0/1. + if v is cbor.FloatNum { + if math.is_nan(j) && math.is_nan(v.value) { + return '' + } + if v.value != j { + return 'float ${j} ↔ ${v.value}' + } + return '' + } + if v is cbor.IntNum { + // Beyond 2^53 JSON's f64 representation loses precision — + // we can't tell IntNum(2^64-1) from IntNum(2^64). Trust the + // roundtrip byte check and accept the structural shape. + f64_exact_int_max := f64(1) * f64(u64(1) << 53) + if math.abs(j) >= f64_exact_int_max { + return '' + } + expected_neg := j < 0 + abs_val := if expected_neg { -j } else { j } + if abs_val != f64(u64(abs_val)) { + return 'float ${j} → IntNum: not integer-valued' + } + if expected_neg { + if !v.negative { + return 'expected negative IntNum for ${j}' + } + if u64(abs_val) - 1 != v.magnitude { + return 'IntNum mag ${v.magnitude} != ${u64(abs_val) - 1}' + } + } else { + if v.negative { + return 'expected non-negative IntNum for ${j}' + } + if u64(abs_val) != v.magnitude { + return 'IntNum mag ${v.magnitude} != ${u64(abs_val)}' + } + } + return '' + } + if v is cbor.Tag { + // Bignum (tag 2/3) representing a value beyond i64. Caller skips. + return 'tag-bignum (caller decides)' + } + return 'expected number ${j}' + } + bool { + if v is cbor.Bool && v.value == j { + return '' + } + return 'expected bool ${j}' + } + string { + if v is cbor.Text && v.value == j { + return '' + } + return 'expected text "${j}"' + } + json2.Null { + if v is cbor.Null { + return '' + } + return 'expected null' + } + []json2.Any { + if v is cbor.Array { + if v.elements.len != j.len { + return 'array length ${v.elements.len} != ${j.len}' + } + for i, item in j { + sub := match_decoded(v.elements[i], item) + if sub != '' { + return 'array[${i}]: ${sub}' + } + } + return '' + } + return 'expected array' + } + map[string]json2.Any { + if v is cbor.Map { + if v.pairs.len != j.len { + return 'map size ${v.pairs.len} != ${j.len}' + } + for pair in v.pairs { + if pair.key !is cbor.Text { + // JSON can only express string keys; mixed-key maps live + // in the diagnostic-only set, so this is safe. + return 'non-text key in JSON-comparable map' + } + tk := pair.key as cbor.Text + if tk.value !in j { + return 'missing key ${tk.value}' + } + jv := j[tk.value] or { return 'missing key ${tk.value}' } + sub := match_decoded(pair.value, jv) + if sub != '' { + return 'map[${tk.value}]: ${sub}' + } + } + return '' + } + return 'expected map' + } + else { + return 'unsupported JSON kind ${typeof(j).name}' + } + } +} + +// match_diagnostic enforces only structural sanity for entries that JSON +// can't directly express (NaN, Infinity, undefined, simple, tag, bignum). +fn match_diagnostic(v cbor.Value, diag string) string { + d := diag.trim_space() + match d { + 'Infinity' { + if v is cbor.FloatNum && math.is_inf(v.value, 1) { + return '' + } + return 'expected +Inf' + } + '-Infinity' { + if v is cbor.FloatNum && math.is_inf(v.value, -1) { + return '' + } + return 'expected -Inf' + } + 'NaN' { + if v is cbor.FloatNum && math.is_nan(v.value) { + return '' + } + return 'expected NaN' + } + 'undefined' { + if v is cbor.Undefined { + return '' + } + return 'expected Undefined' + } + else {} + } + + if d.starts_with('simple(') { + if v is cbor.Simple { + return '' + } + return 'expected Simple' + } + if d.starts_with("h'") { + if v is cbor.Bytes { + return '' + } + return 'expected Bytes' + } + if d.starts_with('(_') { + // Indefinite-length compound. Decoder collapses to definite Value. + if v is cbor.Bytes || v is cbor.Text || v is cbor.Array || v is cbor.Map { + return '' + } + return 'expected indef-collapsed compound' + } + // Tag forms: "0(\"...\")", "1(1363896240)", "23(h'…')", "24(h'…')", "32(\"…\")". + if d.contains('(') && d[0].is_digit() { + if v is cbor.Tag { + return '' + } + return 'expected Tag' + } + // Map literal "{1: 2, 3: 4}" — int-keyed map, can't be expressed in JSON. + if d.starts_with('{') { + if v is cbor.Map { + return '' + } + return 'expected Map' + } + return 'unrecognised diagnostic ${d}' +} + +const fixture_path = os.join_path(os.dir(@FILE), 'appendix_a.json') + +fn test_upstream_appendix_a_corpus() { + raw := os.read_file(fixture_path) or { panic('cannot read fixture: ${err}') } + parsed := json2.decode[json2.Any](raw) or { panic('json: ${err}') } + entries := parsed.as_array() + assert entries.len > 0, 'fixture is empty' + + mut total := 0 + mut roundtrip := 0 + mut value_checks := 0 + mut diag_checks := 0 + mut failures := []string{} + + for entry in entries { + obj := entry.as_map() + total++ + hex_str := obj['hex'] or { json2.Any('') }.str() + // `f818` (simple(24) two-byte form) is well-formed under RFC 7049 + // but RFC 8949 §3.3 explicitly forbids it. The upstream corpus + // predates RFC 8949 — our decoder correctly rejects it. + if hex_str == 'f818' { + cbor.decode[cbor.Value](h(hex_str), cbor.DecodeOpts{}) or { + assert err.msg().contains('1-byte form'), 'unexpected error for f818: ${err}' + continue + } + assert false, 'f818 should be rejected per RFC 8949 §3.3' + } + input := h(hex_str) + rt := if rt_any := obj['roundtrip'] { + rt_any.bool() + } else { + false + } + + decoded_v := cbor.decode[cbor.Value](input, cbor.DecodeOpts{}) or { + failures << 'decode ${hex_str}: ${err}' + continue + } + + // Compare structure where possible. + if decoded := obj['decoded'] { + diff := match_decoded(decoded_v, decoded) + if diff != '' { + // Bignums (decoded JSON ints beyond ±2^64) are represented as + // `Tag(2|3, Bytes)` on the wire. JSON loses them as i64-clamped + // values, so accept Tag/IntNum mismatch when the JSON is at the + // extreme range. + if decoded_v is cbor.Tag { + t := decoded_v as cbor.Tag + if t.number == 2 || t.number == 3 { + value_checks++ + continue + } + } + failures << '${hex_str} decoded: ${diff}' + continue + } + value_checks++ + } else if diag := obj['diagnostic'] { + diff := match_diagnostic(decoded_v, diag.str()) + if diff != '' { + failures << '${hex_str} diagnostic "${diag}": ${diff}' + continue + } + diag_checks++ + } + + // For roundtrip=true entries, re-encode and compare bytes. + if rt { + out := cbor.encode_value(decoded_v, cbor.EncodeOpts{}) + if !bytes_eq(out, input) { + failures << '${hex_str} roundtrip: got ${hex.encode(out)}' + continue + } + roundtrip++ + } + } + + if failures.len > 0 { + for f in failures { + eprintln('FAIL: ${f}') + } + assert false, '${failures.len}/${total} upstream Appendix A vectors failed' + } + + // Sanity: the upstream corpus has 80+ entries; if we ever see less, + // the fixture file is wrong. + assert total >= 80, 'corpus too small: ${total}' + assert roundtrip >= 60, 'too few roundtrip checks: ${roundtrip}' + assert value_checks > 0 + assert diag_checks > 0 +} diff --git a/vlib/encoding/cbor/value.v b/vlib/encoding/cbor/value.v new file mode 100644 index 00000000000000..44d3f1b95f657d --- /dev/null +++ b/vlib/encoding/cbor/value.v @@ -0,0 +1,332 @@ +module cbor + +// Value is the dynamic representation of any CBOR data item. Use it when +// the schema isn't known at compile time, when you need to inspect tags, +// or when keys aren't strings: +// +// v := cbor.decode[cbor.Value](bytes)! +// match v { +// cbor.Text { println(v.value) } +// else { ... } +// } +// +// For known schemas prefer `decode[YourStruct]` — it's faster and avoids +// the heap allocations of building a Value tree. +pub type Value = Array + | Bool + | Bytes + | FloatNum + | IntNum + | Map + | Null + | Simple + | Tag + | Text + | Undefined + +// IntNum holds the full unsigned/negative CBOR integer range. CBOR allows +// values from -(2^64) to 2^64-1, which exceeds either i64 or u64 alone, so +// the sign bit is split out and the magnitude carried as u64. +// +// for unsigned: negative=false, magnitude=value +// for negative: negative=true, magnitude=encoded_argument +// actual integer = -1 - i64(magnitude) (when it fits i64) +pub struct IntNum { +pub: + negative bool + magnitude u64 +} + +// Bytes is a CBOR byte string (major type 2). +pub struct Bytes { +pub mut: + data []u8 +} + +// Text is a CBOR text string (major type 3, valid UTF-8). +pub struct Text { +pub: + value string +} + +// Array holds the elements of a CBOR array (major type 4). +pub struct Array { +pub mut: + elements []Value +} + +// MapPair represents one key/value entry in a CBOR map. CBOR allows any +// data item as a key, so we keep an ordered list of pairs rather than +// using V's `map[K]V`. +pub struct MapPair { +pub: + key Value + value Value +} + +// Map holds the ordered key/value pairs of a CBOR map (major type 5). +pub struct Map { +pub mut: + pairs []MapPair +} + +// Tag wraps a tagged data item (major type 6). The content is stored in +// a one-element slice rather than as a `&Value` reference: V can box and +// recurse a sumtype through a slice, while a direct `&Value` field +// requires manual heap allocation. Use `tag.content()` to access it. +pub struct Tag { +pub: + number u64 + content_box []Value +} + +// content returns the Value enclosed by a Tag, or `Null{}` if missing. +@[inline] +pub fn (t &Tag) content() Value { + if t.content_box.len > 0 { + return t.content_box[0] + } + return Null{} +} + +// Bool is the wrapped form of CBOR true/false (simple values 20/21). +pub struct Bool { +pub: + value bool +} + +// Null is the wrapped form of CBOR null (simple value 22). +pub struct Null {} + +// Undefined is the wrapped form of CBOR undefined (simple value 23). +pub struct Undefined {} + +// FloatBits records which IEEE 754 width the float was originally encoded +// at. The encoder honours this when re-emitting a Value, so round-tripping +// preserves the original byte width. +pub enum FloatBits as u8 { + @none = 0 + half = 16 + single = 32 + double = 64 +} + +// FloatNum is a CBOR floating-point value (major type 7, additional info +// 25/26/27). `bits` records the wire width for fidelity on re-encoding. +pub struct FloatNum { +pub: + value f64 + bits FloatBits = .double +} + +// Simple is the catch-all for major type 7 simple values 0..255 not +// otherwise covered by Bool/Null/Undefined. +pub struct Simple { +pub: + value u8 +} + +// new_uint wraps a u64 in a Value (unsigned-int variant). +@[inline] +pub fn new_uint(n u64) Value { + return IntNum{ + negative: false + magnitude: n + } +} + +// new_int wraps a signed i64 in a Value, picking unsigned vs negative. +@[inline] +pub fn new_int(n i64) Value { + if n >= 0 { + return IntNum{ + negative: false + magnitude: u64(n) + } + } + return IntNum{ + negative: true + magnitude: u64(-(n + 1)) + } +} + +// new_negative wraps the encoded argument of a major-type-1 value, where +// the actual integer is -1 - magnitude. Useful when magnitude exceeds i64. +@[inline] +pub fn new_negative(magnitude u64) Value { + return IntNum{ + negative: true + magnitude: magnitude + } +} + +// new_text wraps a string as a CBOR text Value. +@[inline] +pub fn new_text(s string) Value { + return Text{ + value: s + } +} + +// new_bytes wraps a []u8 as a CBOR byte-string Value. +@[inline] +pub fn new_bytes(b []u8) Value { + return Bytes{ + data: b + } +} + +// new_float wraps an f64 as a CBOR FloatNum that re-encodes at full +// precision unless `f64_to_half` / f32 conversion is lossless. +@[inline] +pub fn new_float(v f64) Value { + return FloatNum{ + value: v + bits: .@none + } +} + +// new_tag wraps an existing Value with a tag number. +@[inline] +pub fn new_tag(number u64, content Value) Value { + return Tag{ + number: number + content_box: [content] + } +} + +// is_nil returns true if v is the CBOR `null` value. +@[inline] +pub fn (v &Value) is_nil() bool { + return v is Null +} + +// is_undefined returns true if v is the CBOR `undefined` value. +@[inline] +pub fn (v &Value) is_undefined() bool { + return v is Undefined +} + +// as_int returns the value as an i64 when it fits, or none otherwise. +// Returns none for FloatNum, Text, etc. +pub fn (v &Value) as_int() ?i64 { + if v is IntNum { + if v.negative { + if v.magnitude > u64(9223372036854775808) { + return none + } + if v.magnitude == u64(9223372036854775808) { + return i64(-9223372036854775807 - 1) + } + return -1 - i64(v.magnitude) + } + if v.magnitude > u64(9223372036854775807) { + return none + } + return i64(v.magnitude) + } + return none +} + +// as_uint returns the value as a u64 if it's a non-negative integer, else none. +pub fn (v &Value) as_uint() ?u64 { + if v is IntNum { + if v.negative { + return none + } + return v.magnitude + } + return none +} + +// as_float returns the f64 value, or none if v isn't a FloatNum. +pub fn (v &Value) as_float() ?f64 { + if v is FloatNum { + return v.value + } + return none +} + +// as_bool returns the boolean value, or none if v isn't a Bool. +pub fn (v &Value) as_bool() ?bool { + if v is Bool { + return v.value + } + return none +} + +// as_string returns the text-string value, or none if v isn't Text. +pub fn (v &Value) as_string() ?string { + if v is Text { + return v.value + } + return none +} + +// as_bytes returns the byte-string payload, or none if v isn't Bytes. +pub fn (v &Value) as_bytes() ?[]u8 { + if v is Bytes { + return v.data + } + return none +} + +// as_array returns the elements of an Array, or none. +pub fn (v &Value) as_array() ?[]Value { + if v is Array { + return v.elements + } + return none +} + +// as_map returns the pairs of a Map, or none. +pub fn (v &Value) as_map() ?[]MapPair { + if v is Map { + return v.pairs + } + return none +} + +// as_tag returns (number, content) of a Tag, or none. +pub fn (v &Value) as_tag() ?(u64, Value) { + if v is Tag { + return v.number, v.content() + } + return none +} + +// get does a linear lookup of a string-keyed entry in a Map. +// O(n) — for hot paths decode into a typed struct or `map[string]V`. +pub fn (v &Value) get(key string) ?Value { + if v is Map { + for pair in v.pairs { + if pair.key is Text { + if pair.key.value == key { + return pair.value + } + } + } + } + return none +} + +// at returns the element at `index` of an Array. +pub fn (v &Value) at(index int) ?Value { + if v is Array { + if index >= 0 && index < v.elements.len { + return v.elements[index] + } + } + return none +} + +// len returns the length of an Array, Map, Text, or Bytes value, or 0. +pub fn (v &Value) len() int { + match v { + Array { return v.elements.len } + Map { return v.pairs.len } + Text { return v.value.len } + Bytes { return v.data.len } + else { return 0 } + } +}