@@ -166,6 +166,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
166166 | Time32 ( TimeUnit :: Millisecond )
167167 | Time64 ( TimeUnit :: Microsecond )
168168 | Time64 ( TimeUnit :: Nanosecond )
169+ | Timestamp ( TimeUnit :: Second , _)
170+ | Timestamp ( TimeUnit :: Millisecond , _)
171+ | Timestamp ( TimeUnit :: Microsecond , _)
169172 | Timestamp ( TimeUnit :: Nanosecond , _)
170173 ) => true ,
171174 ( Utf8 , _) => to_type. is_numeric ( ) && to_type != & Float16 ,
@@ -179,6 +182,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
179182 | Time32 ( TimeUnit :: Millisecond )
180183 | Time64 ( TimeUnit :: Microsecond )
181184 | Time64 ( TimeUnit :: Nanosecond )
185+ | Timestamp ( TimeUnit :: Second , _)
186+ | Timestamp ( TimeUnit :: Millisecond , _)
187+ | Timestamp ( TimeUnit :: Microsecond , _)
182188 | Timestamp ( TimeUnit :: Nanosecond , _)
183189 ) => true ,
184190 ( LargeUtf8 , _) => to_type. is_numeric ( ) && to_type != & Float16 ,
@@ -1141,8 +1147,17 @@ pub fn cast_with_options(
11411147 Time64 ( TimeUnit :: Nanosecond ) => {
11421148 cast_string_to_time64nanosecond :: < i32 > ( array, cast_options)
11431149 }
1150+ Timestamp ( TimeUnit :: Second , _) => {
1151+ cast_string_to_timestamp :: < i32 , TimestampSecondType > ( array, cast_options)
1152+ }
1153+ Timestamp ( TimeUnit :: Millisecond , _) => {
1154+ cast_string_to_timestamp :: < i32 , TimestampMillisecondType > ( array, cast_options)
1155+ }
1156+ Timestamp ( TimeUnit :: Microsecond , _) => {
1157+ cast_string_to_timestamp :: < i32 , TimestampMicrosecondType > ( array, cast_options)
1158+ }
11441159 Timestamp ( TimeUnit :: Nanosecond , _) => {
1145- cast_string_to_timestamp_ns :: < i32 > ( array, cast_options)
1160+ cast_string_to_timestamp :: < i32 , TimestampNanosecondType > ( array, cast_options)
11461161 }
11471162 _ => Err ( ArrowError :: CastError ( format ! (
11481163 "Casting from {from_type:?} to {to_type:?} not supported" ,
@@ -1182,8 +1197,17 @@ pub fn cast_with_options(
11821197 Time64 ( TimeUnit :: Nanosecond ) => {
11831198 cast_string_to_time64nanosecond :: < i64 > ( array, cast_options)
11841199 }
1200+ Timestamp ( TimeUnit :: Second , _) => {
1201+ cast_string_to_timestamp :: < i64 , TimestampSecondType > ( array, cast_options)
1202+ }
1203+ Timestamp ( TimeUnit :: Millisecond , _) => {
1204+ cast_string_to_timestamp :: < i64 , TimestampMillisecondType > ( array, cast_options)
1205+ }
1206+ Timestamp ( TimeUnit :: Microsecond , _) => {
1207+ cast_string_to_timestamp :: < i64 , TimestampMicrosecondType > ( array, cast_options)
1208+ }
11851209 Timestamp ( TimeUnit :: Nanosecond , _) => {
1186- cast_string_to_timestamp_ns :: < i64 > ( array, cast_options)
1210+ cast_string_to_timestamp :: < i64 , TimestampNanosecondType > ( array, cast_options)
11871211 }
11881212 _ => Err ( ArrowError :: CastError ( format ! (
11891213 "Casting from {from_type:?} to {to_type:?} not supported" ,
@@ -2552,8 +2576,11 @@ fn cast_string_to_time64nanosecond<Offset: OffsetSizeTrait>(
25522576 Ok ( Arc :: new ( array) as ArrayRef )
25532577}
25542578
2555- /// Casts generic string arrays to TimeStampNanosecondArray
2556- fn cast_string_to_timestamp_ns < Offset : OffsetSizeTrait > (
2579+ /// Casts generic string arrays to an ArrowTimestampType (TimeStampNanosecondArray, etc.)
2580+ fn cast_string_to_timestamp <
2581+ Offset : OffsetSizeTrait ,
2582+ TimestampType : ArrowTimestampType < Native = i64 > ,
2583+ > (
25572584 array : & dyn Array ,
25582585 cast_options : & CastOptions ,
25592586) -> Result < ArrayRef , ArrowError > {
@@ -2562,26 +2589,36 @@ fn cast_string_to_timestamp_ns<Offset: OffsetSizeTrait>(
25622589 . downcast_ref :: < GenericStringArray < Offset > > ( )
25632590 . unwrap ( ) ;
25642591
2592+ let scale_factor = match TimestampType :: get_time_unit ( ) {
2593+ TimeUnit :: Second => 1_000_000_000 ,
2594+ TimeUnit :: Millisecond => 1_000_000 ,
2595+ TimeUnit :: Microsecond => 1_000 ,
2596+ TimeUnit :: Nanosecond => 1 ,
2597+ } ;
2598+
25652599 let array = if cast_options. safe {
2566- let iter = string_array
2567- . iter ( )
2568- . map ( |v| v . and_then ( |v| string_to_timestamp_nanos ( v ) . ok ( ) ) ) ;
2600+ let iter = string_array. iter ( ) . map ( |v| {
2601+ v . and_then ( |v| string_to_timestamp_nanos ( v ) . ok ( ) . map ( |t| t / scale_factor ) )
2602+ } ) ;
25692603 // Benefit:
25702604 // 20% performance improvement
25712605 // Soundness:
25722606 // The iterator is trustedLen because it comes from an `StringArray`.
2573- unsafe { TimestampNanosecondArray :: from_trusted_len_iter ( iter) }
2607+ unsafe { PrimitiveArray :: < TimestampType > :: from_trusted_len_iter ( iter) }
25742608 } else {
25752609 let vec = string_array
25762610 . iter ( )
2577- . map ( |v| v. map ( string_to_timestamp_nanos) . transpose ( ) )
2611+ . map ( |v| {
2612+ v. map ( |v| string_to_timestamp_nanos ( v) . map ( |t| t / scale_factor) )
2613+ . transpose ( )
2614+ } )
25782615 . collect :: < Result < Vec < Option < i64 > > , _ > > ( ) ?;
25792616
25802617 // Benefit:
25812618 // 20% performance improvement
25822619 // Soundness:
25832620 // The iterator is trustedLen because it comes from an `StringArray`.
2584- unsafe { TimestampNanosecondArray :: from_trusted_len_iter ( vec. iter ( ) ) }
2621+ unsafe { PrimitiveArray :: < TimestampType > :: from_trusted_len_iter ( vec. iter ( ) ) }
25852622 } ;
25862623
25872624 Ok ( Arc :: new ( array) as ArrayRef )
@@ -4704,32 +4741,69 @@ mod tests {
47044741 #[ test]
47054742 fn test_cast_string_to_timestamp ( ) {
47064743 let a1 = Arc :: new ( StringArray :: from ( vec ! [
4707- Some ( "2020-09-08T12:00:00+00:00" ) ,
4744+ Some ( "2020-09-08T12:00:00.123456789 +00:00" ) ,
47084745 Some ( "Not a valid date" ) ,
47094746 None ,
47104747 ] ) ) as ArrayRef ;
47114748 let a2 = Arc :: new ( LargeStringArray :: from ( vec ! [
4712- Some ( "2020-09-08T12:00:00+00:00" ) ,
4749+ Some ( "2020-09-08T12:00:00.123456789 +00:00" ) ,
47134750 Some ( "Not a valid date" ) ,
47144751 None ,
47154752 ] ) ) as ArrayRef ;
47164753 for array in & [ a1, a2] {
4717- let to_type = DataType :: Timestamp ( TimeUnit :: Nanosecond , None ) ;
4718- let b = cast ( array, & to_type) . unwrap ( ) ;
4719- let c = b
4720- . as_any ( )
4721- . downcast_ref :: < TimestampNanosecondArray > ( )
4722- . unwrap ( ) ;
4723- assert_eq ! ( 1599566400000000000 , c. value( 0 ) ) ;
4724- assert ! ( c. is_null( 1 ) ) ;
4725- assert ! ( c. is_null( 2 ) ) ;
4754+ for time_unit in & [
4755+ TimeUnit :: Second ,
4756+ TimeUnit :: Millisecond ,
4757+ TimeUnit :: Microsecond ,
4758+ TimeUnit :: Nanosecond ,
4759+ ] {
4760+ let to_type = DataType :: Timestamp ( time_unit. clone ( ) , None ) ;
4761+ let b = cast ( array, & to_type) . unwrap ( ) ;
4762+
4763+ match time_unit {
4764+ TimeUnit :: Second => {
4765+ let c =
4766+ b. as_any ( ) . downcast_ref :: < TimestampSecondArray > ( ) . unwrap ( ) ;
4767+ assert_eq ! ( 1599566400 , c. value( 0 ) ) ;
4768+ assert ! ( c. is_null( 1 ) ) ;
4769+ assert ! ( c. is_null( 2 ) ) ;
4770+ }
4771+ TimeUnit :: Millisecond => {
4772+ let c = b
4773+ . as_any ( )
4774+ . downcast_ref :: < TimestampMillisecondArray > ( )
4775+ . unwrap ( ) ;
4776+ assert_eq ! ( 1599566400123 , c. value( 0 ) ) ;
4777+ assert ! ( c. is_null( 1 ) ) ;
4778+ assert ! ( c. is_null( 2 ) ) ;
4779+ }
4780+ TimeUnit :: Microsecond => {
4781+ let c = b
4782+ . as_any ( )
4783+ . downcast_ref :: < TimestampMicrosecondArray > ( )
4784+ . unwrap ( ) ;
4785+ assert_eq ! ( 1599566400123456 , c. value( 0 ) ) ;
4786+ assert ! ( c. is_null( 1 ) ) ;
4787+ assert ! ( c. is_null( 2 ) ) ;
4788+ }
4789+ TimeUnit :: Nanosecond => {
4790+ let c = b
4791+ . as_any ( )
4792+ . downcast_ref :: < TimestampNanosecondArray > ( )
4793+ . unwrap ( ) ;
4794+ assert_eq ! ( 1599566400123456789 , c. value( 0 ) ) ;
4795+ assert ! ( c. is_null( 1 ) ) ;
4796+ assert ! ( c. is_null( 2 ) ) ;
4797+ }
4798+ }
47264799
4727- let options = CastOptions { safe : false } ;
4728- let err = cast_with_options ( array, & to_type, & options) . unwrap_err ( ) ;
4729- assert_eq ! (
4730- err. to_string( ) ,
4731- "Cast error: Error parsing 'Not a valid date' as timestamp"
4732- ) ;
4800+ let options = CastOptions { safe : false } ;
4801+ let err = cast_with_options ( array, & to_type, & options) . unwrap_err ( ) ;
4802+ assert_eq ! (
4803+ err. to_string( ) ,
4804+ "Cast error: Error parsing 'Not a valid date' as timestamp"
4805+ ) ;
4806+ }
47334807 }
47344808 }
47354809
0 commit comments