1717
1818//! temporal kernels
1919
20- use chrono:: { DateTime , Datelike , Duration , NaiveDate , Timelike , Utc } ;
20+ use chrono:: {
21+ DateTime , Datelike , Duration , LocalResult , NaiveDate , NaiveDateTime , TimeZone , Timelike , Utc ,
22+ } ;
2123
2224use std:: sync:: Arc ;
2325
@@ -153,10 +155,23 @@ where
153155 Ok ( ( ) )
154156}
155157
156- // Apply the Tz to the Naive Date Time,,convert to UTC, and return as microseconds in Unix epoch
158+ // Apply the Tz to the Naive Date Time, convert to UTC, and return as microseconds in Unix epoch.
159+ // After truncation the carried UTC offset may be wrong if the truncated time falls in a different
160+ // DST period than the original (e.g., truncating a December/PST timestamp to QUARTER yields
161+ // October 1 which is in PDT). We re-resolve the naive local time through the timezone so that
162+ // chrono picks the correct offset for the target date.
157163#[ inline]
158164fn as_micros_from_unix_epoch_utc ( dt : Option < DateTime < Tz > > ) -> i64 {
159- dt. unwrap ( ) . with_timezone ( & Utc ) . timestamp_micros ( )
165+ let dt = dt. unwrap ( ) ;
166+ let naive = dt. naive_local ( ) ;
167+ let tz = dt. timezone ( ) ;
168+
169+ match tz. from_local_datetime ( & naive) {
170+ LocalResult :: Single ( resolved) | LocalResult :: Ambiguous ( resolved, _) => {
171+ resolved. with_timezone ( & Utc ) . timestamp_micros ( )
172+ }
173+ LocalResult :: None => dt. with_timezone ( & Utc ) . timestamp_micros ( ) ,
174+ }
160175}
161176
162177#[ inline]
@@ -529,6 +544,89 @@ pub(crate) fn timestamp_trunc_dyn(
529544 }
530545}
531546
547+ /// Convert microseconds since epoch to NaiveDateTime
548+ #[ inline]
549+ fn micros_to_naive ( micros : i64 ) -> Option < NaiveDateTime > {
550+ DateTime :: from_timestamp_micros ( micros) . map ( |dt| dt. naive_utc ( ) )
551+ }
552+
553+ /// Convert NaiveDateTime back to microseconds since epoch
554+ #[ inline]
555+ fn naive_to_micros ( dt : NaiveDateTime ) -> i64 {
556+ dt. and_utc ( ) . timestamp_micros ( )
557+ }
558+
559+ /// Resolve a truncation format string to the corresponding NaiveDateTime truncation function.
560+ fn ntz_trunc_fn_for_format (
561+ format : & str ,
562+ ) -> Result < fn ( NaiveDateTime ) -> Option < NaiveDateTime > , SparkError > {
563+ match format. to_uppercase ( ) . as_str ( ) {
564+ "YEAR" | "YYYY" | "YY" => Ok ( trunc_date_to_year) ,
565+ "QUARTER" => Ok ( trunc_date_to_quarter) ,
566+ "MONTH" | "MON" | "MM" => Ok ( trunc_date_to_month) ,
567+ "WEEK" => Ok ( trunc_date_to_week) ,
568+ "DAY" | "DD" => Ok ( trunc_date_to_day) ,
569+ "HOUR" => Ok ( trunc_date_to_hour) ,
570+ "MINUTE" => Ok ( trunc_date_to_minute) ,
571+ "SECOND" => Ok ( trunc_date_to_second) ,
572+ "MILLISECOND" => Ok ( trunc_date_to_ms) ,
573+ "MICROSECOND" => Ok ( trunc_date_to_microsec) ,
574+ _ => Err ( SparkError :: Internal ( format ! (
575+ "Unsupported format: {format:?} for function 'timestamp_trunc'"
576+ ) ) ) ,
577+ }
578+ }
579+
580+ /// Truncate a TimestampNTZ array without any timezone conversion.
581+ /// NTZ values are timezone-independent; we treat the raw microseconds as a naive datetime.
582+ fn timestamp_trunc_ntz < T > (
583+ array : & PrimitiveArray < T > ,
584+ format : String ,
585+ ) -> Result < TimestampMicrosecondArray , SparkError >
586+ where
587+ T : ArrowTemporalType + ArrowNumericType ,
588+ i64 : From < T :: Native > ,
589+ {
590+ let trunc_fn = ntz_trunc_fn_for_format ( & format) ?;
591+
592+ let result: TimestampMicrosecondArray = array
593+ . iter ( )
594+ . map ( |opt_val| {
595+ opt_val. and_then ( |v| {
596+ let micros: i64 = v. into ( ) ;
597+ micros_to_naive ( micros)
598+ . and_then ( trunc_fn)
599+ . map ( naive_to_micros)
600+ } )
601+ } )
602+ . collect ( ) ;
603+
604+ Ok ( result)
605+ }
606+
607+ /// Truncate a single NTZ value and append to builder
608+ fn timestamp_trunc_ntz_single < F > (
609+ value : Option < i64 > ,
610+ builder : & mut PrimitiveBuilder < TimestampMicrosecondType > ,
611+ op : F ,
612+ ) -> Result < ( ) , SparkError >
613+ where
614+ F : Fn ( NaiveDateTime ) -> Option < NaiveDateTime > ,
615+ {
616+ match value {
617+ Some ( micros) => match micros_to_naive ( micros) . and_then ( op) {
618+ Some ( truncated) => builder. append_value ( naive_to_micros ( truncated) ) ,
619+ None => {
620+ return Err ( SparkError :: Internal (
621+ "Unable to truncate NTZ timestamp" . to_string ( ) ,
622+ ) )
623+ }
624+ } ,
625+ None => builder. append_null ( ) ,
626+ }
627+ Ok ( ( ) )
628+ }
629+
532630pub ( crate ) fn timestamp_trunc < T > (
533631 array : & PrimitiveArray < T > ,
534632 format : String ,
@@ -540,6 +638,10 @@ where
540638 let builder = TimestampMicrosecondBuilder :: with_capacity ( array. len ( ) ) ;
541639 let iter = ArrayIter :: new ( array) ;
542640 match array. data_type ( ) {
641+ DataType :: Timestamp ( TimeUnit :: Microsecond , None ) => {
642+ // TimestampNTZ: operate directly on naive microsecond values without timezone
643+ timestamp_trunc_ntz ( array, format)
644+ }
543645 DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( tz) ) => {
544646 match format. to_uppercase ( ) . as_str ( ) {
545647 "YEAR" | "YYYY" | "YY" => {
@@ -687,6 +789,15 @@ macro_rules! timestamp_trunc_array_fmt_helper {
687789 "lengths of values array and format array must be the same"
688790 ) ;
689791 match $datatype {
792+ DataType :: Timestamp ( TimeUnit :: Microsecond , None ) => {
793+ // TimestampNTZ: operate directly on naive microsecond values
794+ for ( index, val) in iter. enumerate( ) {
795+ let micros_val = val. map( |v| i64 :: from( v) ) ;
796+ let trunc_fn = ntz_trunc_fn_for_format( $formats. value( index) ) ?;
797+ timestamp_trunc_ntz_single( micros_val, & mut builder, trunc_fn) ?;
798+ }
799+ Ok ( builder. finish( ) )
800+ }
690801 DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( tz) ) => {
691802 let tz: Tz = tz. parse( ) ?;
692803 for ( index, val) in iter. enumerate( ) {
0 commit comments