@@ -231,112 +231,3 @@ fn assert_metadata(batches: &[RecordBatch], expected_metadata: &HashMap<String,
231231 assert_eq ! ( batch. schema( ) . metadata( ) , expected_metadata, ) ;
232232 }
233233}
234-
235- #[ tokio:: test]
236- async fn infer_schema_from_gzip_parquet ( ) {
237- // Test schema inference from a gzip-compressed parquet file
238- let file_path = "../../../datafusion-benchmarks/tpcds/data/sf1/web_site.parquet" ;
239-
240- // Check if the file exists
241- if !Path :: new ( file_path) . exists ( ) {
242- eprintln ! ( "Skipping test: file not found at {}" , file_path) ;
243- return ;
244- }
245-
246- let ctx = SessionContext :: new ( ) ;
247-
248- // Read the parquet file and infer schema
249- let df = ctx
250- . read_parquet ( file_path, ParquetReadOptions :: default ( ) )
251- . await
252- . expect ( "Failed to read parquet file" ) ;
253-
254- let schema = df. schema ( ) ;
255-
256- // Verify that schema was successfully inferred
257- assert ! (
258- !schema. fields( ) . is_empty( ) ,
259- "Schema should have at least one field"
260- ) ;
261-
262- // Print schema for debugging
263- println ! ( "Inferred schema from gzip parquet file:" ) ;
264- for field in schema. fields ( ) {
265- println ! ( " - {}: {:?}" , field. name( ) , field. data_type( ) ) ;
266- }
267-
268- // Verify we can actually read data from the file
269- let results = df. collect ( ) . await . expect ( "Failed to collect results" ) ;
270-
271- // Verify we got some data
272- let total_rows: usize = results. iter ( ) . map ( |batch| batch. num_rows ( ) ) . sum ( ) ;
273- println ! ( "Total rows read: {}" , total_rows) ;
274-
275- assert ! (
276- total_rows > 0 ,
277- "Should have read at least one row from the file"
278- ) ;
279- }
280-
281- #[ tokio:: test]
282- async fn infer_schema_from_gzip_parquet_with_listing_options ( ) {
283- use datafusion:: datasource:: file_format:: parquet:: ParquetFormat ;
284- use datafusion:: datasource:: listing:: { ListingOptions , ListingTableUrl } ;
285- use datafusion_common:: file_options:: file_type:: DEFAULT_PARQUET_EXTENSION ;
286-
287- // Test schema inference using ListingOptions and ParquetFormat
288- let file_path = "../../../datafusion-benchmarks/tpcds/data/sf1/web_site.parquet" ;
289-
290- // Check if the file exists
291- if !Path :: new ( file_path) . exists ( ) {
292- eprintln ! ( "Skipping test: file not found at {}" , file_path) ;
293- return ;
294- }
295-
296- let ctx = SessionContext :: new ( ) ;
297- let state = ctx. state ( ) ;
298-
299- // Create ParquetFormat with options from the session state
300- let format = ParquetFormat :: default ( )
301- . with_options ( state. table_options ( ) . parquet . clone ( ) ) ;
302-
303- // Parse the file path as a ListingTableUrl
304- let table_path = ListingTableUrl :: parse ( file_path)
305- . expect ( "Failed to parse table path" ) ;
306-
307- // Create ListingOptions with the ParquetFormat
308- let options = ListingOptions :: new ( Arc :: new ( format) )
309- . with_file_extension ( DEFAULT_PARQUET_EXTENSION )
310- . with_target_partitions ( state. config ( ) . target_partitions ( ) )
311- . with_collect_stat ( state. config ( ) . collect_statistics ( ) ) ;
312-
313- // Infer schema using the ListingOptions
314- let schema = options
315- . infer_schema ( & state, & table_path)
316- . await
317- . expect ( "Failed to infer schema" ) ;
318-
319- // Verify that schema was successfully inferred
320- assert ! (
321- !schema. fields( ) . is_empty( ) ,
322- "Schema should have at least one field"
323- ) ;
324-
325- // Print schema for debugging
326- println ! ( "Inferred schema using ListingOptions:" ) ;
327- for field in schema. fields ( ) {
328- println ! ( " - {}: {:?}" , field. name( ) , field. data_type( ) ) ;
329- }
330-
331- // Verify expected number of fields for web_site table
332- assert_eq ! (
333- schema. fields( ) . len( ) ,
334- 26 ,
335- "web_site table should have 26 fields"
336- ) ;
337-
338- // Verify some specific fields exist
339- assert ! ( schema. field_with_name( "web_site_sk" ) . is_ok( ) ) ;
340- assert ! ( schema. field_with_name( "web_site_id" ) . is_ok( ) ) ;
341- assert ! ( schema. field_with_name( "web_name" ) . is_ok( ) ) ;
342- }
0 commit comments