Skip to content

Commit b50f308

Browse files
authored
Enable optional skipping of null bytes on dump (#883)
* Introduce skip_null_byte option * Skip null bytes when dumping strings if flag is set * Describe skip_null_bytes flag in docs * Move null byte check inside non-printable chars check * Fix clang-format violations
1 parent 3d1316e commit b50f308

8 files changed

Lines changed: 111 additions & 37 deletions

File tree

docs/mode_table.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
<tr><td>:quirks_mode</td><td>Boolean</td><td></td><td></td><td>6</td><td></td><td></td><td>x</td><td></td></tr>
3737
<tr><td>:safe</td><td>String</td><td></td><td></td><td>x</td><td></td><td></td><td></td><td></td></tr>
3838
<tr><td>:second_precision</td><td>Fixnum</td><td></td><td></td><td></td><td></td><td>x</td><td>x</td><td></td></tr>
39+
<tr><td>:skip_null_byte</td><td>Boolean</td><td>x</td><td>x</td><td>x</td><td>x</td><td>x</td><td>x</td><td></td></tr>
3940
<tr><td>:space</td><td>String</td><td></td><td></td><td>x</td><td>x</td><td></td><td>x</td><td></td></tr>
4041
<tr><td>:space_before</td><td>String</td><td></td><td></td><td>x</td><td>x</td><td></td><td>x</td><td></td></tr>
4142
<tr><td>:symbol_keys</td><td>Boolean</td><td>x</td><td>x</td><td>x</td><td>x</td><td>x</td><td>x</td><td></td></tr>

ext/oj/dump.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,9 @@ void oj_dump_cstr(const char *str, size_t cnt, bool is_sym, bool escape1, Out ou
857857
break;
858858
case '6': // control characters
859859
if (*(uint8_t *)str < 0x80) {
860+
if (0 == (uint8_t)*str && Yes == out->opts->skip_null_byte) {
861+
break;
862+
}
860863
APPEND_CHARS(out->cur, "\\u00", 4);
861864
dump_hex((uint8_t)*str, out);
862865
} else {

ext/oj/mimic_json.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ static struct _options mimic_object_to_json_options = {0, // indent
723723
"%0.16g", // float_fmt
724724
Qnil, // hash_class
725725
Qnil, // array_class
726+
No, // skip_null_byte
726727
{
727728
// dump_opts
728729
false, // use

ext/oj/oj.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ VALUE oj_nanosecond_sym;
9797
VALUE oj_object_class_sym;
9898
VALUE oj_quirks_mode_sym;
9999
VALUE oj_safe_sym;
100+
VALUE oj_skip_null_byte_sym;
100101
VALUE oj_symbolize_names_sym;
101102
VALUE oj_trace_sym;
102103

@@ -207,6 +208,7 @@ struct _options oj_default_options = {
207208
"%0.15g", // float_fmt
208209
Qnil, // hash_class
209210
Qnil, // array_class
211+
No, // skip_null_byte
210212
{
211213
// dump_opts
212214
false, // use
@@ -299,6 +301,7 @@ struct _options oj_default_options = {
299301
* - *:integer_range* [_Range_] Dump integers outside range as strings.
300302
* - *:trace* [_true,_|_false_] Trace all load and dump calls, default is false (trace is off)
301303
* - *:safe* [_true,_|_false_] Safe mimic breaks JSON mimic to be safer, default is false (safe is
304+
* - *:skip_null_byte* [_true_|_false_] if true null bytes in strings will be omitted when dumping
302305
*off)
303306
*
304307
* Return [_Hash_] all current option settings.
@@ -384,6 +387,12 @@ static VALUE get_def_opts(VALUE self) {
384387
opts,
385388
cache_keys_sym,
386389
(Yes == oj_default_options.cache_keys) ? Qtrue : ((No == oj_default_options.cache_keys) ? Qfalse : Qnil));
390+
rb_hash_aset(opts,
391+
oj_skip_null_byte_sym,
392+
(Yes == oj_default_options.skip_null_byte)
393+
? Qtrue
394+
: ((No == oj_default_options.skip_null_byte) ? Qfalse : Qnil));
395+
387396
switch (oj_default_options.mode) {
388397
case StrictMode: rb_hash_aset(opts, mode_sym, strict_sym); break;
389398
case CompatMode: rb_hash_aset(opts, mode_sym, compat_sym); break;
@@ -585,6 +594,7 @@ bool set_yesno_options(VALUE key, VALUE value, Options copts) {
585594
{ignore_under_sym, &copts->ignore_under},
586595
{oj_create_additions_sym, &copts->create_ok},
587596
{cache_keys_sym, &copts->cache_keys},
597+
{oj_skip_null_byte_sym, &copts->skip_null_byte},
588598
{Qnil, 0}};
589599
YesNoOpt o;
590600

@@ -1967,6 +1977,8 @@ void Init_oj(void) {
19671977
rb_gc_register_address(&oj_quirks_mode_sym);
19681978
oj_safe_sym = ID2SYM(rb_intern("safe"));
19691979
rb_gc_register_address(&oj_safe_sym);
1980+
oj_skip_null_byte_sym = ID2SYM(rb_intern("skip_null_byte"));
1981+
rb_gc_register_address(&oj_skip_null_byte_sym);
19701982
oj_space_before_sym = ID2SYM(rb_intern("space_before"));
19711983
rb_gc_register_address(&oj_space_before_sym);
19721984
oj_space_sym = ID2SYM(rb_intern("space"));

ext/oj/oj.h

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -128,43 +128,44 @@ typedef struct _dumpOpts {
128128
} *DumpOpts;
129129

130130
typedef struct _options {
131-
int indent; // indention for dump, default 2
132-
char circular; // YesNo
133-
char auto_define; // YesNo
134-
char sym_key; // YesNo
135-
char escape_mode; // Escape_Mode
136-
char mode; // Mode
137-
char class_cache; // YesNo
138-
char time_format; // TimeFormat
139-
char bigdec_as_num; // YesNo
140-
char bigdec_load; // BigLoad
141-
char compat_bigdec; // boolean (0 or 1)
142-
char to_hash; // YesNo
143-
char to_json; // YesNo
144-
char as_json; // YesNo
145-
char raw_json; // YesNo
146-
char nilnil; // YesNo
147-
char empty_string; // YesNo
148-
char allow_gc; // allow GC during parse
149-
char quirks_mode; // allow single JSON values instead of documents
150-
char allow_invalid; // YesNo - allow invalid unicode
151-
char create_ok; // YesNo allow create_id
152-
char allow_nan; // YEsyNo for parsing only
153-
char trace; // YesNo
154-
char safe; // YesNo
155-
char sec_prec_set; // boolean (0 or 1)
156-
char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
157-
char cache_keys; // YesNo
158-
char cache_str; // string short than or equal to this are cache
159-
int64_t int_range_min; // dump numbers below as string
160-
int64_t int_range_max; // dump numbers above as string
161-
const char *create_id; // 0 or string
162-
size_t create_id_len; // length of create_id
163-
int sec_prec; // second precision when dumping time
164-
char float_prec; // float precision, linked to float_fmt
165-
char float_fmt[7]; // float format for dumping, if empty use Ruby
166-
VALUE hash_class; // class to use in place of Hash on load
167-
VALUE array_class; // class to use in place of Array on load
131+
int indent; // indention for dump, default 2
132+
char circular; // YesNo
133+
char auto_define; // YesNo
134+
char sym_key; // YesNo
135+
char escape_mode; // Escape_Mode
136+
char mode; // Mode
137+
char class_cache; // YesNo
138+
char time_format; // TimeFormat
139+
char bigdec_as_num; // YesNo
140+
char bigdec_load; // BigLoad
141+
char compat_bigdec; // boolean (0 or 1)
142+
char to_hash; // YesNo
143+
char to_json; // YesNo
144+
char as_json; // YesNo
145+
char raw_json; // YesNo
146+
char nilnil; // YesNo
147+
char empty_string; // YesNo
148+
char allow_gc; // allow GC during parse
149+
char quirks_mode; // allow single JSON values instead of documents
150+
char allow_invalid; // YesNo - allow invalid unicode
151+
char create_ok; // YesNo allow create_id
152+
char allow_nan; // YEsyNo for parsing only
153+
char trace; // YesNo
154+
char safe; // YesNo
155+
char sec_prec_set; // boolean (0 or 1)
156+
char ignore_under; // YesNo - ignore attrs starting with _ if true in object and custom modes
157+
char cache_keys; // YesNo
158+
char cache_str; // string short than or equal to this are cache
159+
int64_t int_range_min; // dump numbers below as string
160+
int64_t int_range_max; // dump numbers above as string
161+
const char *create_id; // 0 or string
162+
size_t create_id_len; // length of create_id
163+
int sec_prec; // second precision when dumping time
164+
char float_prec; // float precision, linked to float_fmt
165+
char float_fmt[7]; // float format for dumping, if empty use Ruby
166+
VALUE hash_class; // class to use in place of Hash on load
167+
VALUE array_class; // class to use in place of Array on load
168+
char skip_null_byte; // YesNo
168169
struct _dumpOpts dump_opts;
169170
struct _rxClass str_rx;
170171
VALUE *ignore; // Qnil terminated array of classes or NULL
@@ -323,6 +324,7 @@ extern VALUE oj_max_nesting_sym;
323324
extern VALUE oj_object_class_sym;
324325
extern VALUE oj_object_nl_sym;
325326
extern VALUE oj_quirks_mode_sym;
327+
extern VALUE oj_skip_null_byte_sym;
326328
extern VALUE oj_space_before_sym;
327329
extern VALUE oj_space_sym;
328330
extern VALUE oj_symbolize_names_sym;

pages/Options.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ to true.
265265

266266
The number of digits after the decimal when dumping the seconds of time.
267267

268+
### :skip_null_byte [Boolean]
269+
270+
If true, null bytes in strings will be omitted when dumping.
271+
268272
### :space
269273

270274
String inserted after the ':' character when dumping a JSON object. The

test/test_custom.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,11 @@ def test_omit_nil
393393
assert_equal(%|{"x":{"a":1}}|, json)
394394
end
395395

396+
def test_skip_null_byte
397+
json = Oj.dump({ "fo\x00o" => "b\x00ar" }, :skip_null_byte => true)
398+
assert_equal(%|{"foo":"bar"}|, json)
399+
end
400+
396401
def test_complex
397402
obj = Complex(2, 9)
398403
dump_and_load(obj, false, :create_id => "^o", :create_additions => true)

test/test_various.rb

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ def test_set_options
139139
ignore_under: true,
140140
trace: true,
141141
safe: true,
142+
skip_null_byte: false
142143
}
143144
Oj.default_options = alt
144145
#keys = alt.keys
@@ -734,6 +735,51 @@ def test_omit_nil
734735
assert_equal(%|{"x":{"a":1}}|, json)
735736
end
736737

738+
def test_skip_null_byte
739+
Oj.default_options = { :skip_null_byte => true }
740+
741+
json = Oj.dump({ "fo\x00o" => "b\x00ar" })
742+
assert_equal(%|{"foo":"bar"}|, json)
743+
744+
json = Oj.dump({ "foo\x00" => "\x00bar" })
745+
assert_equal(%|{"foo":"bar"}|, json)
746+
747+
json = Oj.dump({ "\x00foo" => "bar\x00" })
748+
assert_equal(%|{"foo":"bar"}|, json)
749+
750+
json = Oj.dump({ "fo\0o" => "ba\0r" })
751+
assert_equal(%|{"foo":"bar"}|, json)
752+
753+
json = Oj.dump({ "foo\0" => "\0bar" })
754+
assert_equal(%|{"foo":"bar"}|, json)
755+
756+
json = Oj.dump({ "\0foo" => "bar\0" })
757+
assert_equal(%|{"foo":"bar"}|, json)
758+
759+
json = Oj.dump({ "fo\u0000o" => "ba\u0000r" })
760+
assert_equal(%|{"foo":"bar"}|, json)
761+
762+
json = Oj.dump({ "foo\u0000" => "\u0000bar" })
763+
assert_equal(%|{"foo":"bar"}|, json)
764+
765+
json = Oj.dump({ "\u0000foo" => "bar\u0000" })
766+
assert_equal(%|{"foo":"bar"}|, json)
767+
768+
json = Oj.dump({ "\x00foo" => "bar\x00" }, :skip_null_byte => false)
769+
assert_equal(%|{"\\u0000foo":"bar\\u0000"}|, json)
770+
771+
json = Oj.dump({ "\x00foo" => "bar\x00" }, :skip_null_byte => nil)
772+
assert_equal(%|{"\\u0000foo":"bar\\u0000"}|, json)
773+
774+
Oj.default_options = { :skip_null_byte => false }
775+
776+
json = Oj.dump({ "\x00foo" => "bar\x00" })
777+
assert_equal(%|{"\\u0000foo":"bar\\u0000"}|, json)
778+
779+
json = Oj.dump({ "\x00foo" => "bar\x00" }, :skip_null_byte => true)
780+
assert_equal(%|{"foo":"bar"}|, json)
781+
end
782+
737783
def dump_and_load(obj, trace=false)
738784
json = Oj.dump(obj, :indent => 2)
739785
puts json if trace

0 commit comments

Comments
 (0)