2929# http://golang.org/src/pkg/utf8/utf8.go
3030module MultiJson
3131 module OkJson
32+ Upstream = '41'
3233 extend self
3334
3435
@@ -50,12 +51,33 @@ def decode(s)
5051 end
5152
5253
54+ # Encodes x into a json text. It may contain only
55+ # Array, Hash, String, Numeric, true, false, nil.
56+ # (Note, this list excludes Symbol.)
57+ # X itself must be an Array or a Hash.
58+ # No other value can be encoded, and an error will
59+ # be raised if x contains any other value, such as
60+ # Nan, Infinity, Symbol, and Proc, or if a Hash key
61+ # is not a String.
62+ # Strings contained in x must be valid UTF-8.
63+ def encode ( x )
64+ case x
65+ when Hash then objenc ( x )
66+ when Array then arrenc ( x )
67+ else valenc ( x )
68+ end
69+ end
70+
71+
72+ private
73+
74+
5375 # Parses a "json text" in the sense of RFC 4627.
5476 # Returns the parsed value and any trailing tokens.
5577 # Note: this is almost the same as valparse,
5678 # except that it does not accept atomic values.
5779 def textparse ( ts )
58- if ts . length < 0
80+ if ts . length <= 0
5981 raise Error , 'empty'
6082 end
6183
@@ -72,7 +94,7 @@ def textparse(ts)
7294 # Parses a "value" in the sense of RFC 4627.
7395 # Returns the parsed value and any trailing tokens.
7496 def valparse ( ts )
75- if ts . length < 0
97+ if ts . length <= 0
7698 raise Error , 'empty'
7799 end
78100
@@ -201,21 +223,19 @@ def lex(s)
201223 # it is the lexeme.
202224 def tok ( s )
203225 case s [ 0 ]
204- when ?{ then [ '{' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
205- when ?} then [ '}' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
206- when ?: then [ ':' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
207- when ?, then [ ',' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
208- when ?[ then [ '[' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
209- when ?] then [ ']' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
210- when ?n then nulltok ( s )
211- when ?t then truetok ( s )
212- when ?f then falsetok ( s )
213- when ?" then strtok ( s )
214- when Spc then [ :space , s [ 0 , 1 ] , s [ 0 , 1 ] ]
215- when ?\t then [ :space , s [ 0 , 1 ] , s [ 0 , 1 ] ]
216- when ?\n then [ :space , s [ 0 , 1 ] , s [ 0 , 1 ] ]
217- when ?\r then [ :space , s [ 0 , 1 ] , s [ 0 , 1 ] ]
218- else numtok ( s )
226+ when ?{ then [ '{' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
227+ when ?} then [ '}' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
228+ when ?: then [ ':' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
229+ when ?, then [ ',' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
230+ when ?[ then [ '[' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
231+ when ?] then [ ']' , s [ 0 , 1 ] , s [ 0 , 1 ] ]
232+ when ?n then nulltok ( s )
233+ when ?t then truetok ( s )
234+ when ?f then falsetok ( s )
235+ when ?" then strtok ( s )
236+ when Spc , ?\t, ?\n, ?\r then [ :space , s [ 0 , 1 ] , s [ 0 , 1 ] ]
237+ else
238+ numtok ( s )
219239 end
220240 end
221241
@@ -228,12 +248,12 @@ def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
228248 def numtok ( s )
229249 m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/ . match ( s )
230250 if m && m . begin ( 0 ) == 0
231- if m [ 3 ] && !m [ 2 ]
232- [ :val , m [ 0 ] , Integer ( m [ 1 ] ) * ( 10 ** Integer ( m [ 3 ] [ 1 ..- 1 ] ) ) ]
251+ if ! m [ 2 ] && !m [ 3 ]
252+ [ :val , m [ 0 ] , Integer ( m [ 0 ] ) ]
233253 elsif m [ 2 ]
234254 [ :val , m [ 0 ] , Float ( m [ 0 ] ) ]
235255 else
236- [ :val , m [ 0 ] , Integer ( m [ 0 ] ) ]
256+ [ :val , m [ 0 ] , Integer ( m [ 1 ] ) * ( 10 ** Integer ( m [ 3 ] [ 1 ..- 1 ] ) ) ]
237257 end
238258 else
239259 [ ]
@@ -265,17 +285,14 @@ def abbrev(s)
265285 def unquote ( q )
266286 q = q [ 1 ...-1 ]
267287 a = q . dup # allocate a big enough string
268- rubydoesenc = false
269288 # In ruby >= 1.9, a[w] is a codepoint, not a byte.
270- if a . class . method_defined? ( :force_encoding )
289+ if rubydoesenc?
271290 a . force_encoding ( 'UTF-8' )
272- rubydoesenc = true
273291 end
274292 r , w = 0 , 0
275293 while r < q . length
276294 c = q [ r ]
277- case true
278- when c == ?\\
295+ if c == ?\\
279296 r += 1
280297 if r >= q . length
281298 raise Error , "string literal ends with a \" \\ \" : \" #{ q } \" "
@@ -308,7 +325,7 @@ def unquote(q)
308325 end
309326 end
310327 end
311- if rubydoesenc
328+ if rubydoesenc?
312329 a [ w ] = '' << uchar
313330 w += 1
314331 else
@@ -317,7 +334,7 @@ def unquote(q)
317334 else
318335 raise Error , "invalid escape char #{ q [ r ] } in \" #{ q } \" "
319336 end
320- when c == ?", c < Spc
337+ elsif c == ?" || c < Spc
321338 raise Error , "invalid character in string literal \" #{ q } \" "
322339 else
323340 # Copy anything else byte-for-byte.
@@ -338,15 +355,14 @@ def unquote(q)
338355 # bytes in string a at position i.
339356 # Returns the number of bytes written.
340357 def ucharenc ( a , i , u )
341- case true
342- when u <= Uchar1max
358+ if u <= Uchar1max
343359 a [ i ] = ( u & 0xff ) . chr
344360 1
345- when u <= Uchar2max
361+ elsif u <= Uchar2max
346362 a [ i +0 ] = ( Utag2 | ( ( u >>6 ) &0xff ) ) . chr
347363 a [ i +1 ] = ( Utagx | ( u &Umaskx ) ) . chr
348364 2
349- when u <= Uchar3max
365+ elsif u <= Uchar3max
350366 a [ i +0 ] = ( Utag3 | ( ( u >>12 ) &0xff ) ) . chr
351367 a [ i +1 ] = ( Utagx | ( ( u >>6 ) &Umaskx ) ) . chr
352368 a [ i +2 ] = ( Utagx | ( u &Umaskx ) ) . chr
@@ -383,35 +399,15 @@ def surrogate?(u)
383399
384400
385401 def nibble ( c )
386- case true
387- when ?0 <= c && c <= ?9 then c . ord - ?0. ord
388- when ?a <= c && c <= ?z then c . ord - ?a. ord + 10
389- when ?A <= c && c <= ?Z then c . ord - ?A. ord + 10
402+ if ?0 <= c && c <= ?9 then c . ord - ?0. ord
403+ elsif ?a <= c && c <= ?z then c . ord - ?a. ord + 10
404+ elsif ?A <= c && c <= ?Z then c . ord - ?A. ord + 10
390405 else
391406 raise Error , "invalid hex code #{ c } "
392407 end
393408 end
394409
395410
396- # Encodes x into a json text. It may contain only
397- # Array, Hash, String, Numeric, true, false, nil.
398- # (Note, this list excludes Symbol.)
399- # X itself must be an Array or a Hash.
400- # No other value can be encoded, and an error will
401- # be raised if x contains any other value, such as
402- # Nan, Infinity, Symbol, and Proc, or if a Hash key
403- # is not a String.
404- # Strings contained in x must be valid UTF-8.
405- def encode ( x )
406- case x
407- when Hash then objenc ( x )
408- when Array then arrenc ( x )
409- else
410- raise Error , 'root value must be an Array or a Hash'
411- end
412- end
413-
414-
415411 def valenc ( x )
416412 case x
417413 when Hash then objenc ( x )
@@ -455,9 +451,6 @@ def strenc(s)
455451 t . putc ( ?")
456452 r = 0
457453
458- # In ruby >= 1.9, s[r] is a codepoint, not a byte.
459- rubydoesenc = s . class . method_defined? ( :encoding )
460-
461454 while r < s . length
462455 case s [ r ]
463456 when ?" then t . print ( '\\"' )
@@ -469,15 +462,15 @@ def strenc(s)
469462 when ?\t then t . print ( '\\t' )
470463 else
471464 c = s [ r ]
472- case true
473- when rubydoesenc
465+ # In ruby >= 1.9, s[r] is a codepoint, not a byte.
466+ if rubydoesenc?
474467 begin
475468 c . ord # will raise an error if c is invalid UTF-8
476469 t . write ( c )
477470 rescue
478471 t . write ( Ustrerr )
479472 end
480- when Spc <= c && c <= ?~
473+ elsif Spc <= c && c <= ?~
481474 t . putc ( c )
482475 else
483476 n = ucharcopy ( t , s , r ) # ensure valid UTF-8 output
@@ -569,6 +562,11 @@ def ucharcopy(t, s, i)
569562 end
570563
571564
565+ def rubydoesenc?
566+ ::String . method_defined? ( :force_encoding )
567+ end
568+
569+
572570 class Utf8Error < ::StandardError
573571 end
574572
@@ -577,15 +575,15 @@ class Error < ::StandardError
577575 end
578576
579577
580- Utagx = 0x80 # 1000 0000
581- Utag2 = 0xc0 # 1100 0000
582- Utag3 = 0xe0 # 1110 0000
583- Utag4 = 0xf0 # 1111 0000
584- Utag5 = 0xF8 # 1111 1000
585- Umaskx = 0x3f # 0011 1111
586- Umask2 = 0x1f # 0001 1111
587- Umask3 = 0x0f # 0000 1111
588- Umask4 = 0x07 # 0000 0111
578+ Utagx = 0b1000_0000
579+ Utag2 = 0b1100_0000
580+ Utag3 = 0b1110_0000
581+ Utag4 = 0b1111_0000
582+ Utag5 = 0b1111_1000
583+ Umaskx = 0b0011_1111
584+ Umask2 = 0b0001_1111
585+ Umask3 = 0b0000_1111
586+ Umask4 = 0b0000_0111
589587 Uchar1max = ( 1 <<7 ) - 1
590588 Uchar2max = ( 1 <<11 ) - 1
591589 Uchar3max = ( 1 <<16 ) - 1
@@ -599,4 +597,4 @@ class Error < ::StandardError
599597 Spc = ' ' [ 0 ]
600598 Unesc = { ?b=> ?\b, ?f=> ?\f, ?n=> ?\n, ?r=> ?\r, ?t=> ?\t}
601599 end
602- end
600+ end
0 commit comments