From 803e115fb35d3dde7f58ed3e238ca66eb80dbb8f Mon Sep 17 00:00:00 2001 From: klion26 Date: Tue, 12 Aug 2025 19:24:01 +0800 Subject: [PATCH 1/4] [Variant] Add Variant::Time primitive and cast logic --- parquet-testing | 2 +- parquet-variant-compute/Cargo.toml | 2 +- .../src/cast_to_variant.rs | 163 ++++++++++++++++-- parquet-variant-json/src/to_json.rs | 35 +++- parquet-variant/src/builder.rs | 10 ++ parquet-variant/src/decoder.rs | 42 ++++- parquet-variant/src/variant.rs | 39 ++++- parquet-variant/tests/variant_interop.rs | 7 +- 8 files changed, 277 insertions(+), 23 deletions(-) diff --git a/parquet-testing b/parquet-testing index b68bea40fed8..5cbfc43d488c 160000 --- a/parquet-testing +++ b/parquet-testing @@ -1 +1 @@ -Subproject commit b68bea40fed8d1a780a9e09dd2262017e04b19ad +Subproject commit 5cbfc43d488c9c8404a1a7088cca400ae095b831 diff --git a/parquet-variant-compute/Cargo.toml b/parquet-variant-compute/Cargo.toml index 65ee0b33fc71..819a131f9c42 100644 --- a/parquet-variant-compute/Cargo.toml +++ b/parquet-variant-compute/Cargo.toml @@ -36,7 +36,7 @@ arrow-schema = { workspace = true } half = { version = "2.1", default-features = false } parquet-variant = { workspace = true } parquet-variant-json = { workspace = true } -chrono = {workspace = true} +chrono = { workspace = true } [lib] name = "parquet_variant_compute" diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 6c212e390211..781be0b5a651 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -20,19 +20,15 @@ use std::sync::Arc; use crate::{VariantArray, VariantArrayBuilder}; use arrow::array::{ Array, AsArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, - TimestampSecondArray, -}; -use arrow::datatypes::{ - i256, BinaryType, BinaryViewType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, - Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, - LargeBinaryType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, -}; + TimestampSecondArray}; +use arrow::datatypes::{i256, BinaryType, BinaryViewType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type}; use arrow::temporal_conversions::{ timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_s_to_datetime, timestamp_us_to_datetime, }; use arrow_schema::{ArrowError, DataType, TimeUnit}; use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; +use chrono::NaiveTime; use half::f16; use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; @@ -321,6 +317,75 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { DataType::Timestamp(time_unit, time_zone) => { convert_timestamp(time_unit, time_zone, input, &mut builder); } + DataType::Time32(unit) => { + match *unit { + TimeUnit::Second => { + generic_conversion!( + Time32SecondType, + as_primitive, + // nano second are always 0 + |v| NaiveTime::from_num_seconds_from_midnight_opt(v as u32, 0u32).unwrap(), + input, + builder + ); + } + TimeUnit::Millisecond => { + generic_conversion!( + Time32MillisecondType, + as_primitive, + |v| NaiveTime::from_num_seconds_from_midnight_opt( + v as u32 / 1000, + (v as u32 % 1000) * 1_000_000 + ) + .unwrap(), + input, + builder + ); + } + _ => { + return Err(ArrowError::CastError(format!( + "Unsupported Time32 unit: {:?}", + unit + ))); + } + }; + } + DataType::Time64(unit) => { + match *unit { + TimeUnit::Microsecond => { + generic_conversion!( + Time64MicrosecondType, + as_primitive, + |v| NaiveTime::from_num_seconds_from_midnight_opt( + (v / 1_000_000) as u32, + (v % 1_000_000 * 1_000) as u32 + ) + .unwrap(), + input, + builder + ); + } + TimeUnit::Nanosecond => { + generic_conversion!( + Time64NanosecondType, + as_primitive, + |v| NaiveTime::from_num_seconds_from_midnight_opt( + (v / 1_000_000_000) as u32, + (v % 1_000_000_000) as u32 + ) + .unwrap(), + input, + builder + ); + } + _ => { + return Err(ArrowError::CastError(format!( + "Unsupported Time64 unit: {:?}", + unit + ))); + } + }; + } dt => { return Err(ArrowError::CastError(format!( "Unsupported data type for casting to Variant: {dt:?}", @@ -337,17 +402,13 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { #[cfg(test)] mod tests { use super::*; - use arrow::array::{ - ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, - FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder, - GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, - UInt16Array, UInt32Array, UInt64Array, UInt8Array, - }; + use arrow::array::{ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder, GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array}; use arrow_schema::{ DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, }; use parquet_variant::{Variant, VariantDecimal16}; use std::{sync::Arc, vec}; + use arrow::datatypes::i256; macro_rules! max_unscaled_value { (32, $precision:expr) => { @@ -1130,6 +1191,82 @@ mod tests { ) } + #[test] + fn test_cast_time32_second_to_variant_time() { + let array: Time32SecondArray = vec![Some(1), Some(86_399), None].into(); + let values = Arc::new(array); + run_test( + values, + vec![ + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(1, 0).unwrap(), + )), + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(86_399, 0).unwrap(), + )), + None, + ], + ) + } + + #[test] + fn test_cast_time32_millisecond_to_variant_time() { + let array: Time32MillisecondArray = vec![Some(123_456), Some(456_000), None].into(); + let values = Arc::new(array); + run_test( + values, + vec![ + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(123, 456_000_000).unwrap(), + )), + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(456, 0).unwrap(), + )), + None, + ], + ) + } + + #[test] + fn test_cast_time64_micro_to_variant_time() { + let array: Time64MicrosecondArray = vec![Some(1), Some(123_456_789), None].into(); + let values = Arc::new(array); + run_test( + values, + vec![ + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(0, 1_000).unwrap(), + )), + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(123, 456_789_000).unwrap(), + )), + None, + ], + ) + } + + #[test] + fn test_cast_time64_nano_to_variant_time() { + let array: Time64NanosecondArray = + vec![Some(1), Some(1001), Some(123_456_789_012), None].into(); + run_test( + Arc::new(array), + // as we can only present with micro second, so the nano second will round donw to 0 + vec![ + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(0, 0).unwrap(), + )), + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(0, 1_000).unwrap(), + )), + Some(Variant::Time( + NaiveTime::from_num_seconds_from_midnight_opt(123, 456_789_000).unwrap(), + )), + None, + ], + ) + } + /// Converts the given `Array` to a `VariantArray` and tests the conversion /// against the expected values. It also tests the handling of nulls by /// setting one element to null and verifying the output. diff --git a/parquet-variant-json/src/to_json.rs b/parquet-variant-json/src/to_json.rs index a3ff04bcc99a..4734319bd130 100644 --- a/parquet-variant-json/src/to_json.rs +++ b/parquet-variant-json/src/to_json.rs @@ -18,11 +18,11 @@ //! Module for converting Variant data to JSON format use arrow_schema::ArrowError; use base64::{engine::general_purpose, Engine as _}; +use chrono::Timelike; +use parquet_variant::{Variant, VariantList, VariantObject}; use serde_json::Value; use std::io::Write; -use parquet_variant::{Variant, VariantList, VariantObject}; - // Format string constants to avoid duplication and reduce errors const DATE_FORMAT: &str = "%Y-%m-%d"; const TIMESTAMP_NTZ_FORMAT: &str = "%Y-%m-%dT%H:%M:%S%.6f"; @@ -40,6 +40,19 @@ fn format_binary_base64(bytes: &[u8]) -> String { general_purpose::STANDARD.encode(bytes) } +fn format_time_ntz_str(time: &chrono::NaiveTime) -> String { + let base = time.format("%H:%M:%S").to_string(); + let micros = time.nanosecond() / 1000; + match micros { + 0 => format!("{}.{}", base, 0), + _ => { + let micros_str = format!("{:06}", micros); + let micros_str_trimmed = micros_str.trim_matches('0'); + format!("{}.{}", base, micros_str_trimmed) + } + } +} + /// /// This function writes JSON directly to any type that implements [`Write`], /// making it efficient for streaming or when you want to control the output destination. @@ -110,6 +123,7 @@ pub fn variant_to_json(json_buffer: &mut impl Write, variant: &Variant) -> Resul Variant::TimestampNtzMicros(ts) => { write!(json_buffer, "\"{}\"", format_timestamp_ntz_string(ts))? } + Variant::Time(time) => write!(json_buffer, "\"{}\"", format_time_ntz_str(time))?, Variant::Binary(bytes) => { // Encode binary as base64 string let base64_str = format_binary_base64(bytes); @@ -348,6 +362,7 @@ pub fn variant_to_json_value(variant: &Variant) -> Result { Variant::Date(date) => Ok(Value::String(format_date_string(date))), Variant::TimestampMicros(ts) => Ok(Value::String(ts.to_rfc3339())), Variant::TimestampNtzMicros(ts) => Ok(Value::String(format_timestamp_ntz_string(ts))), + Variant::Time(time) => Ok(Value::String(format_time_ntz_str(time))), Variant::Binary(bytes) => Ok(Value::String(format_binary_base64(bytes))), Variant::String(s) => Ok(Value::String(s.to_string())), Variant::ShortString(s) => Ok(Value::String(s.to_string())), @@ -371,7 +386,7 @@ pub fn variant_to_json_value(variant: &Variant) -> Result { #[cfg(test)] mod tests { use super::*; - use chrono::{DateTime, NaiveDate, Utc}; + use chrono::{DateTime, NaiveDate, NaiveTime, Utc}; use parquet_variant::{VariantDecimal16, VariantDecimal4, VariantDecimal8}; #[test] @@ -457,6 +472,20 @@ mod tests { Ok(()) } + #[test] + fn test_time_to_json() -> Result<(), ArrowError> { + let naive_time = NaiveTime::from_num_seconds_from_midnight_opt(12345, 123460708).unwrap(); + let variant = Variant::Time(naive_time); + let json = variant_to_json_string(&variant)?; + assert!(json.contains("03:25:45.12346")); + assert!(json.starts_with('"') && json.ends_with('"')); + + let json_value = variant_to_json_value(&variant)?; + assert!(matches!(json_value, Value::String(_))); + println!("{:?}", json); + Ok(()) + } + #[test] fn test_binary_to_json() -> Result<(), ArrowError> { let binary_data = b"Hello, World!"; diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index b1607f8f306d..67890ac587b1 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -20,6 +20,7 @@ use crate::{ VariantMetadata, VariantObject, }; use arrow_schema::ArrowError; +use chrono::Timelike; use indexmap::{IndexMap, IndexSet}; use std::collections::HashSet; @@ -190,6 +191,13 @@ impl ValueBuffer { self.append_slice(µs.to_le_bytes()); } + fn append_time_micros(&mut self, value: chrono::NaiveTime) { + self.append_primitive_header(VariantPrimitiveType::Time); + let micros_from_midnight = value.num_seconds_from_midnight() as u64 * 1_000_000 + + value.nanosecond() as u64 / 1_000; + self.append_slice(µs_from_midnight.to_le_bytes()); + } + fn append_decimal4(&mut self, decimal4: VariantDecimal4) { self.append_primitive_header(VariantPrimitiveType::Decimal4); self.append_u8(decimal4.scale()); @@ -334,6 +342,7 @@ impl ValueBuffer { Variant::ShortString(s) => self.append_short_string(s), Variant::Object(obj) => self.append_object(metadata_builder, obj), Variant::List(list) => self.append_list(metadata_builder, list), + Variant::Time(v) => self.append_time_micros(v), } } @@ -364,6 +373,7 @@ impl ValueBuffer { Variant::ShortString(s) => self.append_short_string(s), Variant::Object(obj) => self.try_append_object(metadata_builder, obj)?, Variant::List(list) => self.try_append_list(metadata_builder, list)?, + Variant::Time(v) => self.append_time_micros(v), } Ok(()) diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs index 21069cdc02fc..5c46c1dc41b4 100644 --- a/parquet-variant/src/decoder.rs +++ b/parquet-variant/src/decoder.rs @@ -20,7 +20,7 @@ use crate::utils::{ use crate::ShortString; use arrow_schema::ArrowError; -use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc}; +use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, Utc}; /// The basic type of a [`Variant`] value, encoded in the first two bits of the /// header byte. @@ -63,6 +63,7 @@ pub enum VariantPrimitiveType { Float = 14, Binary = 15, String = 16, + Time = 17, } /// Extracts the basic type from a header byte @@ -104,6 +105,7 @@ impl TryFrom for VariantPrimitiveType { 14 => Ok(VariantPrimitiveType::Float), 15 => Ok(VariantPrimitiveType::Binary), 16 => Ok(VariantPrimitiveType::String), + 17 => Ok(VariantPrimitiveType::Time), _ => Err(ArrowError::InvalidArgumentError(format!( "unknown primitive type: {value}", ))), @@ -295,6 +297,25 @@ pub(crate) fn decode_timestampntz_micros(data: &[u8]) -> Result Result { + let micros_since_epoch = u64::from_le_bytes(array_from_slice(data, 0)?); + + let case_error = ArrowError::CastError(format!( + "Could not cast {micros_since_epoch} microseconds into a NaiveTime" + )); + + if micros_since_epoch >= 86_400_000_000 { + return Err(case_error); + } + + let nanos_since_midnight = micros_since_epoch * 1_000; + NaiveTime::from_num_seconds_from_midnight_opt( + (nanos_since_midnight / 1_000_000_000) as u32, + (nanos_since_midnight % 1_000_000_000) as u32, + ) + .ok_or(case_error) +} + /// Decodes a Binary from the value section of a variant. pub(crate) fn decode_binary(data: &[u8]) -> Result<&[u8], ArrowError> { let len = u32::from_le_bytes(array_from_slice(data, 0)?) as usize; @@ -441,6 +462,25 @@ mod tests { ); } + mod time { + use super::*; + + test_decoder_bounds!( + test_timentz, + [0x53, 0x1f, 0x8e, 0xdf, 0x2, 0, 0, 0], + decode_time_ntz, + NaiveTime::from_num_seconds_from_midnight_opt(12340, 567891_000).unwrap() + ); + + #[test] + fn test_decode_time_ntz_invalid() { + let invalid_second = u64::MAX; + let data = invalid_second.to_le_bytes(); + let result = decode_time_ntz(&data); + assert!(matches!(result, Err(ArrowError::CastError(_)))); + } + } + #[test] fn test_binary_exact_length() { let data = [ diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 82de637b0697..9d5ea0155d32 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -27,7 +27,7 @@ use crate::utils::{first_byte_from_slice, slice_from_slice}; use std::ops::Deref; use arrow_schema::ArrowError; -use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc}; +use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; mod decimal; mod list; @@ -248,6 +248,8 @@ pub enum Variant<'m, 'v> { Binary(&'v [u8]), /// Primitive (type_id=1): STRING String(&'v str), + /// Primitive (type_id=1): TIME(isAdjustedToUTC=false, MICROS) + Time(NaiveTime), /// Short String (type_id=2): STRING ShortString(ShortString<'v>), // need both metadata & value @@ -385,6 +387,7 @@ impl<'m, 'v> Variant<'m, 'v> { VariantPrimitiveType::String => { Variant::String(decoder::decode_long_string(value_data)?) } + VariantPrimitiveType::Time => Variant::Time(decoder::decode_time_ntz(value_data)?), }, VariantBasicType::ShortString => { Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?) @@ -1030,6 +1033,34 @@ impl<'m, 'v> Variant<'m, 'v> { } } + /// Converts this variant to a `NaiveTime` if possible. + /// + /// Returns `Some(NaiveTime)` for `Variant::Time`, + /// `None` for non-Time variants. + /// + /// # Example + /// + /// ``` + /// use chrono::NaiveTime; + /// use parquet_variant::Variant; + /// + /// // you can extract a `NaiveTime` from a `Variant::Time` + /// let time = NaiveTime::from_hms_micro_opt(1, 2, 3, 4).unwrap(); + /// let v1 = Variant::from(time); + /// assert_eq!(Some(time), v1.as_time_utc()); + /// + /// // but not from other variants. + /// let v2 = Variant::from("Hello"); + /// assert_eq!(None, v2.as_time_utc()); + /// ``` + pub fn as_time_utc(&'m self) -> Option { + if let Variant::Time(time) = self { + Some(*time) + } else { + None + } + } + /// If this is a list and the requested index is in bounds, retrieves the corresponding /// element. Otherwise, returns None. /// @@ -1246,6 +1277,12 @@ impl<'v> From<&'v [u8]> for Variant<'_, 'v> { } } +impl From for Variant<'_, '_> { + fn from(value: NaiveTime) -> Self { + Variant::Time(value) + } +} + impl<'v> From<&'v str> for Variant<'_, 'v> { fn from(value: &'v str) -> Self { if value.len() > MAX_SHORT_STRING_BYTES { diff --git a/parquet-variant/tests/variant_interop.rs b/parquet-variant/tests/variant_interop.rs index e37172a7d568..1c5b8ed221a6 100644 --- a/parquet-variant/tests/variant_interop.rs +++ b/parquet-variant/tests/variant_interop.rs @@ -21,7 +21,7 @@ use std::path::{Path, PathBuf}; use std::{env, fs}; -use chrono::NaiveDate; +use chrono::{NaiveDate, NaiveTime}; use parquet_variant::{ ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, }; @@ -112,9 +112,9 @@ fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> { ("primitive_boolean_false", Variant::BooleanFalse), ("primitive_boolean_true", Variant::BooleanTrue), ("primitive_date", Variant::Date(NaiveDate::from_ymd_opt(2025, 4 , 16).unwrap())), - ("primitive_decimal4", Variant::from(VariantDecimal4::try_new(1234i32, 2u8).unwrap())), + ("primitive_decimal4", Variant::from(VariantDecimal4::try_new(1234i32, 2u8).unwrap())), // ("primitive_decimal8", Variant::Decimal8{integer: 1234567890, scale: 2}), - ("primitive_decimal8", Variant::Decimal8(VariantDecimal8::try_new(1234567890,2).unwrap())), + ("primitive_decimal8", Variant::Decimal8(VariantDecimal8::try_new(1234567890,2).unwrap())), ("primitive_decimal16", Variant::Decimal16(VariantDecimal16::try_new(1234567891234567890, 2).unwrap())), ("primitive_float", Variant::Float(1234567890.1234)), ("primitive_double", Variant::Double(1234567890.1234)), @@ -127,6 +127,7 @@ fn get_primitive_cases() -> Vec<(&'static str, Variant<'static, 'static>)> { ("primitive_timestamp", Variant::TimestampMicros(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(16, 34, 56, 780).unwrap().and_utc())), ("primitive_timestampntz", Variant::TimestampNtzMicros(NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap())), ("short_string", Variant::ShortString(ShortString::try_new("Less than 64 bytes (❤\u{fe0f} with utf8)").unwrap())), + ("primitive_time", Variant::Time(NaiveTime::from_hms_micro_opt(12, 33, 54, 123456).unwrap())), ] } #[test] From 16b2cad15479045d8ccf528341b5b870af1181bb Mon Sep 17 00:00:00 2001 From: klion26 Date: Tue, 12 Aug 2025 20:43:33 +0800 Subject: [PATCH 2/4] fix style --- parquet-variant/src/decoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs index 5c46c1dc41b4..ff870596e4de 100644 --- a/parquet-variant/src/decoder.rs +++ b/parquet-variant/src/decoder.rs @@ -469,7 +469,7 @@ mod tests { test_timentz, [0x53, 0x1f, 0x8e, 0xdf, 0x2, 0, 0, 0], decode_time_ntz, - NaiveTime::from_num_seconds_from_midnight_opt(12340, 567891_000).unwrap() + NaiveTime::from_num_seconds_from_midnight_opt(12340, 567_891_000).unwrap() ); #[test] From 8a522b6d42e4733152adfc9110988b70e6b5f692 Mon Sep 17 00:00:00 2001 From: klion26 Date: Wed, 13 Aug 2025 16:27:22 +0800 Subject: [PATCH 3/4] update test --- .../src/cast_to_variant.rs | 22 ++++++++++++++----- parquet-variant-json/src/to_json.rs | 4 +--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/parquet-variant-compute/src/cast_to_variant.rs b/parquet-variant-compute/src/cast_to_variant.rs index 781be0b5a651..bbd696d5fbdb 100644 --- a/parquet-variant-compute/src/cast_to_variant.rs +++ b/parquet-variant-compute/src/cast_to_variant.rs @@ -20,15 +20,21 @@ use std::sync::Arc; use crate::{VariantArray, VariantArrayBuilder}; use arrow::array::{ Array, AsArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, - TimestampSecondArray}; -use arrow::datatypes::{i256, BinaryType, BinaryViewType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LargeBinaryType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type}; + TimestampSecondArray, +}; +use arrow::datatypes::{ + i256, BinaryType, BinaryViewType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, + Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, + LargeBinaryType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType, + Time64NanosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; use arrow::temporal_conversions::{ timestamp_ms_to_datetime, timestamp_ns_to_datetime, timestamp_s_to_datetime, timestamp_us_to_datetime, }; use arrow_schema::{ArrowError, DataType, TimeUnit}; -use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; use chrono::NaiveTime; +use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; use half::f16; use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8}; @@ -402,13 +408,19 @@ pub fn cast_to_variant(input: &dyn Array) -> Result { #[cfg(test)] mod tests { use super::*; - use arrow::array::{ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder, GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array}; + use arrow::array::{ + ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, + FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder, + GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array, NullArray, + Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, + UInt16Array, UInt32Array, UInt64Array, UInt8Array, + }; + use arrow::datatypes::i256; use arrow_schema::{ DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION, }; use parquet_variant::{Variant, VariantDecimal16}; use std::{sync::Arc, vec}; - use arrow::datatypes::i256; macro_rules! max_unscaled_value { (32, $precision:expr) => { diff --git a/parquet-variant-json/src/to_json.rs b/parquet-variant-json/src/to_json.rs index 4734319bd130..e18f3b327c8d 100644 --- a/parquet-variant-json/src/to_json.rs +++ b/parquet-variant-json/src/to_json.rs @@ -477,12 +477,10 @@ mod tests { let naive_time = NaiveTime::from_num_seconds_from_midnight_opt(12345, 123460708).unwrap(); let variant = Variant::Time(naive_time); let json = variant_to_json_string(&variant)?; - assert!(json.contains("03:25:45.12346")); - assert!(json.starts_with('"') && json.ends_with('"')); + assert_eq!("\"03:25:45.12346\"", json); let json_value = variant_to_json_value(&variant)?; assert!(matches!(json_value, Value::String(_))); - println!("{:?}", json); Ok(()) } From afbcb4b485538b4f92dc4929113547186bd2f8b8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 14 Aug 2025 15:37:53 -0400 Subject: [PATCH 4/4] Only install cargo-msrv if needed --- .github/workflows/rust.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5b95c7f6359c..9cd33b296da1 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -116,8 +116,9 @@ jobs: - uses: actions/checkout@v5 - name: Setup Rust toolchain uses: ./.github/actions/setup-builder - - name: Install cargo-msrv - run: cargo install cargo-msrv + - name: Install cargo-msrv (if needed) + # cargo-msrv binary may be cached by the cargo cache step in setup-builder, and cargo install will error if it is already installed + run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv ; fi - name: Check all packages run: | # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies