Skip to content

Commit 1de6523

Browse files
authored
regex: bugfix for #18363, [^\s]+ act different from \S+ (#18371)
1 parent 5300441 commit 1de6523

2 files changed

Lines changed: 79 additions & 1 deletion

File tree

vlib/regex/regex.v

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2202,6 +2202,77 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
22022202
}
22032203
// char class IST
22042204
else if ist == regex.ist_char_class_pos || ist == regex.ist_char_class_neg {
2205+
// check next token to be false
2206+
mut next_check_flag := false
2207+
2208+
// if we are done with max go on dot char are dedicated case!!
2209+
if re.prog[state.pc].rep >= re.prog[state.pc].rep_max {
2210+
re.state_list.pop()
2211+
m_state = .ist_next
2212+
continue
2213+
}
2214+
2215+
if re.prog[state.pc].last_dot_flag == false && re.prog[state.pc].cc_check_pc >= 0
2216+
&& re.prog[state.pc].rep >= re.prog[state.pc].rep_min {
2217+
// load the char
2218+
// ch_t, _ := re.get_charb(in_txt, state.i+char_len)
2219+
ch_t := ch
2220+
chk_pc := re.prog[state.pc].cc_check_pc
2221+
2222+
// simple char
2223+
if re.prog[chk_pc].ist == regex.ist_simple_char {
2224+
if re.prog[chk_pc].ch == ch_t {
2225+
next_check_flag = true
2226+
}
2227+
// println("Check [ist_simple_char] [${re.prog[chk_pc].ch}]==[${ch_t:c}] => $next_check_flag")
2228+
}
2229+
// char char_class
2230+
else if re.prog[chk_pc].ist == regex.ist_char_class_pos
2231+
|| re.prog[chk_pc].ist == regex.ist_char_class_neg {
2232+
mut cc_neg := false
2233+
if re.prog[chk_pc].ist == regex.ist_char_class_neg {
2234+
cc_neg = true
2235+
}
2236+
mut cc_res := re.check_char_class(chk_pc, ch_t)
2237+
2238+
if cc_neg {
2239+
cc_res = !cc_res
2240+
}
2241+
next_check_flag = cc_res
2242+
// println("Check [ist_char_class] => $next_check_flag")
2243+
}
2244+
// check bsls
2245+
else if re.prog[chk_pc].ist == regex.ist_bsls_char {
2246+
next_check_flag = re.prog[chk_pc].validator(u8(ch_t))
2247+
// println("Check [ist_bsls_char] => $next_check_flag")
2248+
}
2249+
}
2250+
2251+
// check if we must continue or pass to the next IST
2252+
if next_check_flag == true && re.prog[state.pc + 1].ist != regex.ist_prog_end {
2253+
// println("save the state!!")
2254+
mut dot_state := StateObj{
2255+
group_index: state.group_index
2256+
match_flag: state.match_flag
2257+
match_index: state.match_index
2258+
first_match: state.first_match
2259+
pc: state.pc
2260+
i: state.i + char_len
2261+
char_len: char_len
2262+
last_dot_pc: state.pc
2263+
}
2264+
// if we are managing a \[something]* stay on the same char on return
2265+
if re.prog[state.pc].rep_min == 0 {
2266+
dot_state.i -= char_len
2267+
}
2268+
2269+
re.state_list << dot_state
2270+
2271+
m_state = .ist_quant_n
2272+
// println("dot_char stack len: ${re.state_list.len}")
2273+
continue
2274+
}
2275+
22052276
state.match_flag = false
22062277
mut cc_neg := false
22072278

vlib/regex/regex_test.v

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ match_test_suite = [
105105
TestItem{"this cpapaz adce aabe third",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
106106
TestItem{"1234this cpapaz adce aabe ter",r"(c(pa)+z)(\s[\a]+){2}$",-1,0},
107107
TestItem{"cpapaz ole. pipipo,",r"^.*c.+ol?e.*p([ip])+o$",-1,0},
108-
TestItem{"/home/us_er/pippo/info-01.jpeg", r"(/?[-\w_]+)*\.txt$",-1,26}
109108

110109
// check unicode
111110
TestItem{"this is a Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ test",r".*a [Ⅰ-Ⅵ ]+",0,34},
@@ -174,6 +173,14 @@ match_test_suite = [
174173
TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(.*)",0,26},
175174
TestItem{"refs/remotes/origin/mastep", r"refs/remotes/origin/(\w*)",0,26},
176175
TestItem{"refs/remotes/origin/master", r"refs/remotes/origin/(\w*)",0,26},
176+
177+
// test \S+ vs [^\s]+
178+
TestItem{"ab.c", r"\S+\.",0,3},
179+
TestItem{"ab.c", r"[^\s]+\.",0,3},
180+
TestItem{"ab.c", r"\S*\.",0,3},
181+
TestItem{"ab.c", r"[^\s]*\.",0,3},
182+
TestItem{"ab c", r"[\S]+\s",0,3},
183+
TestItem{"ab c", r"[^\s]+\s",0,3},
177184
]
178185
)
179186

0 commit comments

Comments
 (0)