@@ -37,7 +37,7 @@ escaped_length(VALUE str)
3737{
3838 const long len = RSTRING_LEN (str );
3939 if (len >= LONG_MAX / HTML_ESCAPE_MAX_LEN ) {
40- ruby_malloc_size_overflow (len , HTML_ESCAPE_MAX_LEN );
40+ ruby_malloc_size_overflow (len , HTML_ESCAPE_MAX_LEN );
4141 }
4242 return len * HTML_ESCAPE_MAX_LEN ;
4343}
@@ -81,8 +81,8 @@ optimized_unescape_html(VALUE str)
8181 enum {UNICODE_MAX = 0x10ffff };
8282 rb_encoding * enc = rb_enc_get (str );
8383 unsigned long charlimit = (strcasecmp (rb_enc_name (enc ), "UTF-8" ) == 0 ? UNICODE_MAX :
84- strcasecmp (rb_enc_name (enc ), "ISO-8859-1" ) == 0 ? 256 :
85- 128 );
84+ strcasecmp (rb_enc_name (enc ), "ISO-8859-1" ) == 0 ? 256 :
85+ 128 );
8686 long i , len , beg = 0 ;
8787 size_t clen , plen ;
8888 int overflow ;
@@ -94,89 +94,89 @@ optimized_unescape_html(VALUE str)
9494 cstr = RSTRING_PTR (str );
9595
9696 for (i = 0 ; i < len ; i ++ ) {
97- unsigned long cc ;
98- char c = cstr [i ];
99- if (c != '&' ) continue ;
100- plen = i - beg ;
101- if (++ i >= len ) break ;
102- c = (unsigned char )cstr [i ];
97+ unsigned long cc ;
98+ char c = cstr [i ];
99+ if (c != '&' ) continue ;
100+ plen = i - beg ;
101+ if (++ i >= len ) break ;
102+ c = (unsigned char )cstr [i ];
103103#define MATCH (s ) (len - i >= (int)rb_strlen_lit(s) && \
104- memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \
105- (i += rb_strlen_lit(s) - 1, 1))
106- switch (c ) {
107- case 'a' :
108- ++ i ;
109- if (MATCH ("pos;" )) {
110- c = '\'' ;
111- }
112- else if (MATCH ("mp;" )) {
113- c = '&' ;
114- }
115- else continue ;
116- break ;
117- case 'q' :
118- ++ i ;
119- if (MATCH ("uot;" )) {
120- c = '"' ;
121- }
122- else continue ;
123- break ;
124- case 'g' :
125- ++ i ;
126- if (MATCH ("t;" )) {
127- c = '>' ;
128- }
129- else continue ;
130- break ;
131- case 'l' :
132- ++ i ;
133- if (MATCH ("t;" )) {
134- c = '<' ;
135- }
136- else continue ;
137- break ;
138- case '#' :
139- if (len - ++ i >= 2 && ISDIGIT (cstr [i ])) {
140- cc = ruby_scan_digits (& cstr [i ], len - i , 10 , & clen , & overflow );
141- }
142- else if ((cstr [i ] == 'x' || cstr [i ] == 'X' ) && len - ++ i >= 2 && ISXDIGIT (cstr [i ])) {
143- cc = ruby_scan_digits (& cstr [i ], len - i , 16 , & clen , & overflow );
144- }
145- else continue ;
146- i += clen ;
147- if (overflow || cc >= charlimit || cstr [i ] != ';' ) continue ;
148- if (!dest ) {
149- dest = rb_str_buf_new (len );
150- }
151- rb_str_cat (dest , cstr + beg , plen );
152- if (charlimit > 256 ) {
153- rb_str_cat (dest , buf , rb_enc_mbcput ((OnigCodePoint )cc , buf , enc ));
154- }
155- else {
156- c = (unsigned char )cc ;
157- rb_str_cat (dest , & c , 1 );
158- }
159- beg = i + 1 ;
160- continue ;
161- default :
162- -- i ;
163- continue ;
164- }
165- if (!dest ) {
166- dest = rb_str_buf_new (len );
167- }
168- rb_str_cat (dest , cstr + beg , plen );
169- rb_str_cat (dest , & c , 1 );
170- beg = i + 1 ;
104+ memcmp(&cstr[i], s, rb_strlen_lit(s)) == 0 && \
105+ (i += rb_strlen_lit(s) - 1, 1))
106+ switch (c ) {
107+ case 'a' :
108+ ++ i ;
109+ if (MATCH ("pos;" )) {
110+ c = '\'' ;
111+ }
112+ else if (MATCH ("mp;" )) {
113+ c = '&' ;
114+ }
115+ else continue ;
116+ break ;
117+ case 'q' :
118+ ++ i ;
119+ if (MATCH ("uot;" )) {
120+ c = '"' ;
121+ }
122+ else continue ;
123+ break ;
124+ case 'g' :
125+ ++ i ;
126+ if (MATCH ("t;" )) {
127+ c = '>' ;
128+ }
129+ else continue ;
130+ break ;
131+ case 'l' :
132+ ++ i ;
133+ if (MATCH ("t;" )) {
134+ c = '<' ;
135+ }
136+ else continue ;
137+ break ;
138+ case '#' :
139+ if (len - ++ i >= 2 && ISDIGIT (cstr [i ])) {
140+ cc = ruby_scan_digits (& cstr [i ], len - i , 10 , & clen , & overflow );
141+ }
142+ else if ((cstr [i ] == 'x' || cstr [i ] == 'X' ) && len - ++ i >= 2 && ISXDIGIT (cstr [i ])) {
143+ cc = ruby_scan_digits (& cstr [i ], len - i , 16 , & clen , & overflow );
144+ }
145+ else continue ;
146+ i += clen ;
147+ if (overflow || cc >= charlimit || cstr [i ] != ';' ) continue ;
148+ if (!dest ) {
149+ dest = rb_str_buf_new (len );
150+ }
151+ rb_str_cat (dest , cstr + beg , plen );
152+ if (charlimit > 256 ) {
153+ rb_str_cat (dest , buf , rb_enc_mbcput ((OnigCodePoint )cc , buf , enc ));
154+ }
155+ else {
156+ c = (unsigned char )cc ;
157+ rb_str_cat (dest , & c , 1 );
158+ }
159+ beg = i + 1 ;
160+ continue ;
161+ default :
162+ -- i ;
163+ continue ;
164+ }
165+ if (!dest ) {
166+ dest = rb_str_buf_new (len );
167+ }
168+ rb_str_cat (dest , cstr + beg , plen );
169+ rb_str_cat (dest , & c , 1 );
170+ beg = i + 1 ;
171171 }
172172
173173 if (dest ) {
174- rb_str_cat (dest , cstr + beg , len - beg );
175- preserve_original_state (str , dest );
176- return dest ;
174+ rb_str_cat (dest , cstr + beg , len - beg );
175+ preserve_original_state (str , dest );
176+ return dest ;
177177 }
178178 else {
179- return rb_str_dup (str );
179+ return rb_str_dup (str );
180180 }
181181}
182182
@@ -211,33 +211,33 @@ optimized_escape(VALUE str)
211211 cstr = RSTRING_PTR (str );
212212
213213 for (i = 0 ; i < len ; ++ i ) {
214- const unsigned char c = (unsigned char )cstr [i ];
215- if (!url_unreserved_char (c )) {
216- if (!dest ) {
217- dest = rb_str_buf_new (len );
218- }
219-
220- rb_str_cat (dest , cstr + beg , i - beg );
221- beg = i + 1 ;
222-
223- if (c == ' ' ) {
224- rb_str_cat_cstr (dest , "+" );
225- }
226- else {
227- buf [1 ] = upper_hexdigits [(c >> 4 ) & 0xf ];
228- buf [2 ] = upper_hexdigits [c & 0xf ];
229- rb_str_cat (dest , buf , 3 );
230- }
231- }
214+ const unsigned char c = (unsigned char )cstr [i ];
215+ if (!url_unreserved_char (c )) {
216+ if (!dest ) {
217+ dest = rb_str_buf_new (len );
218+ }
219+
220+ rb_str_cat (dest , cstr + beg , i - beg );
221+ beg = i + 1 ;
222+
223+ if (c == ' ' ) {
224+ rb_str_cat_cstr (dest , "+" );
225+ }
226+ else {
227+ buf [1 ] = upper_hexdigits [(c >> 4 ) & 0xf ];
228+ buf [2 ] = upper_hexdigits [c & 0xf ];
229+ rb_str_cat (dest , buf , 3 );
230+ }
231+ }
232232 }
233233
234234 if (dest ) {
235- rb_str_cat (dest , cstr + beg , len - beg );
236- preserve_original_state (str , dest );
237- return dest ;
235+ rb_str_cat (dest , cstr + beg , len - beg );
236+ preserve_original_state (str , dest );
237+ return dest ;
238238 }
239239 else {
240- return rb_str_dup (str );
240+ return rb_str_dup (str );
241241 }
242242}
243243
@@ -254,52 +254,52 @@ optimized_unescape(VALUE str, VALUE encoding)
254254 cstr = RSTRING_PTR (str );
255255
256256 for (i = 0 ; i < len ; ++ i ) {
257- char buf [1 ];
258- const char c = cstr [i ];
259- int clen = 0 ;
260- if (c == '%' ) {
261- if (i + 3 > len ) break ;
262- if (!ISXDIGIT (cstr [i + 1 ])) continue ;
263- if (!ISXDIGIT (cstr [i + 2 ])) continue ;
264- buf [0 ] = ((char_to_number (cstr [i + 1 ]) << 4 )
265- | char_to_number (cstr [i + 2 ]));
266- clen = 2 ;
267- }
268- else if (c == '+' ) {
269- buf [0 ] = ' ' ;
270- }
271- else {
272- continue ;
273- }
274-
275- if (!dest ) {
276- dest = rb_str_buf_new (len );
277- }
278-
279- rb_str_cat (dest , cstr + beg , i - beg );
280- i += clen ;
281- beg = i + 1 ;
282-
283- rb_str_cat (dest , buf , 1 );
257+ char buf [1 ];
258+ const char c = cstr [i ];
259+ int clen = 0 ;
260+ if (c == '%' ) {
261+ if (i + 3 > len ) break ;
262+ if (!ISXDIGIT (cstr [i + 1 ])) continue ;
263+ if (!ISXDIGIT (cstr [i + 2 ])) continue ;
264+ buf [0 ] = ((char_to_number (cstr [i + 1 ]) << 4 )
265+ | char_to_number (cstr [i + 2 ]));
266+ clen = 2 ;
267+ }
268+ else if (c == '+' ) {
269+ buf [0 ] = ' ' ;
270+ }
271+ else {
272+ continue ;
273+ }
274+
275+ if (!dest ) {
276+ dest = rb_str_buf_new (len );
277+ }
278+
279+ rb_str_cat (dest , cstr + beg , i - beg );
280+ i += clen ;
281+ beg = i + 1 ;
282+
283+ rb_str_cat (dest , buf , 1 );
284284 }
285285
286286 if (dest ) {
287- rb_str_cat (dest , cstr + beg , len - beg );
288- preserve_original_state (str , dest );
289- cr = ENC_CODERANGE_UNKNOWN ;
287+ rb_str_cat (dest , cstr + beg , len - beg );
288+ preserve_original_state (str , dest );
289+ cr = ENC_CODERANGE_UNKNOWN ;
290290 }
291291 else {
292- dest = rb_str_dup (str );
293- cr = ENC_CODERANGE (str );
292+ dest = rb_str_dup (str );
293+ cr = ENC_CODERANGE (str );
294294 }
295295 origenc = rb_enc_get_index (str );
296296 if (origenc != encidx ) {
297- rb_enc_associate_index (dest , encidx );
298- if (!ENC_CODERANGE_CLEAN_P (rb_enc_str_coderange (dest ))) {
299- rb_enc_associate_index (dest , origenc );
300- if (cr != ENC_CODERANGE_UNKNOWN )
301- ENC_CODERANGE_SET (dest , cr );
302- }
297+ rb_enc_associate_index (dest , encidx );
298+ if (!ENC_CODERANGE_CLEAN_P (rb_enc_str_coderange (dest ))) {
299+ rb_enc_associate_index (dest , origenc );
300+ if (cr != ENC_CODERANGE_UNKNOWN )
301+ ENC_CODERANGE_SET (dest , cr );
302+ }
303303 }
304304 return dest ;
305305}
@@ -317,10 +317,10 @@ cgiesc_escape_html(VALUE self, VALUE str)
317317 StringValue (str );
318318
319319 if (rb_enc_str_asciicompat_p (str )) {
320- return optimized_escape_html (str );
320+ return optimized_escape_html (str );
321321 }
322322 else {
323- return rb_call_super (1 , & str );
323+ return rb_call_super (1 , & str );
324324 }
325325}
326326
@@ -337,10 +337,10 @@ cgiesc_unescape_html(VALUE self, VALUE str)
337337 StringValue (str );
338338
339339 if (rb_enc_str_asciicompat_p (str )) {
340- return optimized_unescape_html (str );
340+ return optimized_unescape_html (str );
341341 }
342342 else {
343- return rb_call_super (1 , & str );
343+ return rb_call_super (1 , & str );
344344 }
345345}
346346
@@ -357,18 +357,18 @@ cgiesc_escape(VALUE self, VALUE str)
357357 StringValue (str );
358358
359359 if (rb_enc_str_asciicompat_p (str )) {
360- return optimized_escape (str );
360+ return optimized_escape (str );
361361 }
362362 else {
363- return rb_call_super (1 , & str );
363+ return rb_call_super (1 , & str );
364364 }
365365}
366366
367367static VALUE
368368accept_charset (int argc , VALUE * argv , VALUE self )
369369{
370370 if (argc > 0 )
371- return argv [0 ];
371+ return argv [0 ];
372372 return rb_cvar_get (CLASS_OF (self ), id_accept_charset );
373373}
374374
@@ -387,11 +387,11 @@ cgiesc_unescape(int argc, VALUE *argv, VALUE self)
387387 StringValue (str );
388388
389389 if (rb_enc_str_asciicompat_p (str )) {
390- VALUE enc = accept_charset (argc - 1 , argv + 1 , self );
391- return optimized_unescape (str , enc );
390+ VALUE enc = accept_charset (argc - 1 , argv + 1 , self );
391+ return optimized_unescape (str , enc );
392392 }
393393 else {
394- return rb_call_super (argc , argv );
394+ return rb_call_super (argc , argv );
395395 }
396396}
397397
0 commit comments