1616// under the License.
1717
1818//! Regex expressions
19- use arrow:: array:: { Array , ArrayRef , OffsetSizeTrait } ;
19+ use arrow:: array:: { Array , ArrayRef , AsArray } ;
2020use arrow:: compute:: kernels:: regexp;
2121use arrow:: datatypes:: DataType ;
2222use arrow:: datatypes:: Field ;
2323use datafusion_common:: exec_err;
2424use datafusion_common:: ScalarValue ;
2525use datafusion_common:: { arrow_datafusion_err, plan_err} ;
26- use datafusion_common:: {
27- cast:: as_generic_string_array, internal_err, DataFusionError , Result ,
28- } ;
26+ use datafusion_common:: { DataFusionError , Result } ;
2927use datafusion_expr:: { ColumnarValue , Documentation , TypeSignature } ;
3028use datafusion_expr:: { ScalarUDFImpl , Signature , Volatility } ;
3129use datafusion_macros:: user_doc;
@@ -86,11 +84,12 @@ impl RegexpMatchFunc {
8684 signature : Signature :: one_of (
8785 vec ! [
8886 // Planner attempts coercion to the target type starting with the most preferred candidate.
89- // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8, Utf8 )`.
90- // If that fails, it proceeds to `(LargeUtf8 , Utf8)`.
91- // TODO: Native support Utf8View for regexp_match.
87+ // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View )`.
88+ // If that fails, it proceeds to `(Utf8 , Utf8)`.
89+ TypeSignature :: Exact ( vec! [ Utf8View , Utf8View ] ) ,
9290 TypeSignature :: Exact ( vec![ Utf8 , Utf8 ] ) ,
9391 TypeSignature :: Exact ( vec![ LargeUtf8 , LargeUtf8 ] ) ,
92+ TypeSignature :: Exact ( vec![ Utf8View , Utf8View , Utf8View ] ) ,
9493 TypeSignature :: Exact ( vec![ Utf8 , Utf8 , Utf8 ] ) ,
9594 TypeSignature :: Exact ( vec![ LargeUtf8 , LargeUtf8 , LargeUtf8 ] ) ,
9695 ] ,
@@ -138,7 +137,7 @@ impl ScalarUDFImpl for RegexpMatchFunc {
138137 . map ( |arg| arg. to_array ( inferred_length) )
139138 . collect :: < Result < Vec < _ > > > ( ) ?;
140139
141- let result = regexp_match_func ( & args) ;
140+ let result = regexp_match ( & args) ;
142141 if is_scalar {
143142 // If all inputs are scalar, keeps output as scalar
144143 let result = result. and_then ( |arr| ScalarValue :: try_from_array ( & arr, 0 ) ) ;
@@ -153,33 +152,35 @@ impl ScalarUDFImpl for RegexpMatchFunc {
153152 }
154153}
155154
156- fn regexp_match_func ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
157- match args[ 0 ] . data_type ( ) {
158- DataType :: Utf8 => regexp_match :: < i32 > ( args) ,
159- DataType :: LargeUtf8 => regexp_match :: < i64 > ( args) ,
160- other => {
161- internal_err ! ( "Unsupported data type {other:?} for function regexp_match" )
162- }
163- }
164- }
165- pub fn regexp_match < T : OffsetSizeTrait > ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
155+ pub fn regexp_match ( args : & [ ArrayRef ] ) -> Result < ArrayRef > {
166156 match args. len ( ) {
167157 2 => {
168- let values = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
169- let regex = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
170- regexp:: regexp_match ( values, regex, None )
158+ regexp:: regexp_match ( & args[ 0 ] , & args[ 1 ] , None )
171159 . map_err ( |e| arrow_datafusion_err ! ( e) )
172160 }
173161 3 => {
174- let values = as_generic_string_array :: < T > ( & args[ 0 ] ) ?;
175- let regex = as_generic_string_array :: < T > ( & args[ 1 ] ) ?;
176- let flags = as_generic_string_array :: < T > ( & args[ 2 ] ) ?;
177-
178- if flags. iter ( ) . any ( |s| s == Some ( "g" ) ) {
179- return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
162+ match args[ 2 ] . data_type ( ) {
163+ DataType :: Utf8View => {
164+ if args[ 2 ] . as_string_view ( ) . iter ( ) . any ( |s| s == Some ( "g" ) ) {
165+ return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
166+ }
167+ }
168+ DataType :: Utf8 => {
169+ if args[ 2 ] . as_string :: < i32 > ( ) . iter ( ) . any ( |s| s == Some ( "g" ) ) {
170+ return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
171+ }
172+ }
173+ DataType :: LargeUtf8 => {
174+ if args[ 2 ] . as_string :: < i64 > ( ) . iter ( ) . any ( |s| s == Some ( "g" ) ) {
175+ return plan_err ! ( "regexp_match() does not support the \" global\" option" ) ;
176+ }
177+ }
178+ e => {
179+ return plan_err ! ( "regexp_match was called with unexpected data type {e:?}" ) ;
180+ }
180181 }
181182
182- regexp:: regexp_match ( values , regex , Some ( flags ) )
183+ regexp:: regexp_match ( & args [ 0 ] , & args [ 1 ] , Some ( & args [ 2 ] ) )
183184 . map_err ( |e| arrow_datafusion_err ! ( e) )
184185 }
185186 other => exec_err ! (
@@ -211,7 +212,7 @@ mod tests {
211212 expected_builder. append ( false ) ;
212213 let expected = expected_builder. finish ( ) ;
213214
214- let re = regexp_match :: < i32 > ( & [ Arc :: new ( values) , Arc :: new ( patterns) ] ) . unwrap ( ) ;
215+ let re = regexp_match ( & [ Arc :: new ( values) , Arc :: new ( patterns) ] ) . unwrap ( ) ;
215216
216217 assert_eq ! ( re. as_ref( ) , & expected) ;
217218 }
@@ -236,9 +237,8 @@ mod tests {
236237 expected_builder. append ( false ) ;
237238 let expected = expected_builder. finish ( ) ;
238239
239- let re =
240- regexp_match :: < i32 > ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
241- . unwrap ( ) ;
240+ let re = regexp_match ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
241+ . unwrap ( ) ;
242242
243243 assert_eq ! ( re. as_ref( ) , & expected) ;
244244 }
@@ -250,7 +250,7 @@ mod tests {
250250 let flags = StringArray :: from ( vec ! [ "g" ] ) ;
251251
252252 let re_err =
253- regexp_match :: < i32 > ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
253+ regexp_match ( & [ Arc :: new ( values) , Arc :: new ( patterns) , Arc :: new ( flags) ] )
254254 . expect_err ( "unsupported flag should have failed" ) ;
255255
256256 assert_eq ! ( re_err. strip_backtrace( ) , "Error during planning: regexp_match() does not support the \" global\" option" ) ;
0 commit comments