Skip to content

Commit bfd27b8

Browse files
feat: Rename metadata columns to use underscore prefix (_location, _last_modified, _size) (#138)
1 parent 96e89d9 commit bfd27b8

3 files changed

Lines changed: 55 additions & 55 deletions

File tree

datafusion/datasource/src/file_scan_config.rs

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2834,9 +2834,9 @@ mod tests {
28342834
// Verify the schema matches expected order
28352835
assert_eq!(result.num_columns(), 5);
28362836
assert_eq!(result.schema().field(0).name(), "data");
2837-
assert_eq!(result.schema().field(1).name(), "location");
2837+
assert_eq!(result.schema().field(1).name(), "_location");
28382838
assert_eq!(result.schema().field(2).name(), "year");
2839-
assert_eq!(result.schema().field(3).name(), "size");
2839+
assert_eq!(result.schema().field(3).name(), "_size");
28402840
assert_eq!(result.schema().field(4).name(), "month");
28412841

28422842
// Verify the values are correct in each column
@@ -2916,7 +2916,7 @@ mod tests {
29162916

29172917
assert_eq!(result.num_columns(), 3);
29182918
assert_eq!(result.schema().field(0).name(), "value");
2919-
assert_eq!(result.schema().field(1).name(), "size");
2919+
assert_eq!(result.schema().field(1).name(), "_size");
29202920
assert_eq!(result.schema().field(2).name(), "part");
29212921

29222922
// Verify values
@@ -2981,7 +2981,7 @@ mod tests {
29812981
assert_eq!(result.num_columns(), 3);
29822982
assert_eq!(result.schema().field(0).name(), "value");
29832983
assert_eq!(result.schema().field(1).name(), "part");
2984-
assert_eq!(result.schema().field(2).name(), "location");
2984+
assert_eq!(result.schema().field(2).name(), "_location");
29852985

29862986
let location_col = result
29872987
.column(2)
@@ -3065,10 +3065,10 @@ mod tests {
30653065
// Verify schema order
30663066
assert_eq!(result.num_columns(), 7);
30673067
assert_eq!(result.schema().field(0).name(), "col_a");
3068-
assert_eq!(result.schema().field(1).name(), "size");
3068+
assert_eq!(result.schema().field(1).name(), "_size");
30693069
assert_eq!(result.schema().field(2).name(), "col_b");
30703070
assert_eq!(result.schema().field(3).name(), "p1");
3071-
assert_eq!(result.schema().field(4).name(), "last_modified");
3071+
assert_eq!(result.schema().field(4).name(), "_last_modified");
30723072
assert_eq!(result.schema().field(5).name(), "col_c");
30733073
assert_eq!(result.schema().field(6).name(), "p2");
30743074

@@ -3212,8 +3212,8 @@ mod tests {
32123212

32133213
assert_eq!(result.num_columns(), 3);
32143214
assert_eq!(result.schema().field(0).name(), "data");
3215-
assert_eq!(result.schema().field(1).name(), "location");
3216-
assert_eq!(result.schema().field(2).name(), "size");
3215+
assert_eq!(result.schema().field(1).name(), "_location");
3216+
assert_eq!(result.schema().field(2).name(), "_size");
32173217

32183218
let location_col = result
32193219
.column(1)
@@ -3280,7 +3280,7 @@ mod tests {
32803280
assert_eq!(result.num_columns(), 2);
32813281
assert_eq!(result.num_rows(), 5);
32823282
assert_eq!(result.schema().field(0).name(), "part");
3283-
assert_eq!(result.schema().field(1).name(), "size");
3283+
assert_eq!(result.schema().field(1).name(), "_size");
32843284
}
32853285

32863286
#[test]
@@ -3759,12 +3759,12 @@ mod tests {
37593759
.projected_schema()
37603760
.expect("projected schema");
37613761
assert_eq!(projected.fields().len(), 3);
3762-
assert_eq!(projected.field(2).name(), "location");
3762+
assert_eq!(projected.field(2).name(), "_location");
37633763

37643764
// Create a filter on the metadata column: location@2 = 's3://bucket'
37653765
// (index 2 because projected output is [id@0, value@1, location@2])
37663766
let location_filter: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
3767-
Arc::new(Column::new("location", 2)),
3767+
Arc::new(Column::new("_location", 2)),
37683768
Operator::Eq,
37693769
Arc::new(Literal::new(ScalarValue::Utf8(Some(
37703770
"s3://bucket".to_string(),
@@ -3825,7 +3825,7 @@ mod tests {
38253825
));
38263826
// Filter 1: metadata column filter on location@2 (beyond projection)
38273827
let location_filter: Arc<dyn PhysicalExpr> = Arc::new(BinaryExpr::new(
3828-
Arc::new(Column::new("location", 2)),
3828+
Arc::new(Column::new("_location", 2)),
38293829
Operator::Eq,
38303830
Arc::new(Literal::new(ScalarValue::Utf8(Some(
38313831
"s3://bucket".to_string(),
@@ -3884,8 +3884,8 @@ mod tests {
38843884
);
38853885
assert_eq!(schema.field(0).name(), "id");
38863886
assert_eq!(schema.field(1).name(), "value");
3887-
assert_eq!(schema.field(2).name(), "location");
3888-
assert_eq!(schema.field(3).name(), "size");
3887+
assert_eq!(schema.field(2).name(), "_location");
3888+
assert_eq!(schema.field(3).name(), "_size");
38893889
}
38903890

38913891
/// Same as above but with a projection applied — metadata columns must
@@ -3932,7 +3932,7 @@ mod tests {
39323932
);
39333933
assert_eq!(schema.field(0).name(), "id");
39343934
assert_eq!(schema.field(1).name(), "value");
3935-
assert_eq!(schema.field(2).name(), "location");
3935+
assert_eq!(schema.field(2).name(), "_location");
39363936

39373937
// Verify it matches projected_schema()
39383938
let projected = data_source
@@ -4043,7 +4043,7 @@ mod tests {
40434043
// metadata = [location(2), size(3)]
40444044
// SELECT location, id -> projection = [2, 0]
40454045
// -> file_partition_indices = [0], metadata_positions = [(0, 0)]
4046-
// Metadata "location" should be at output position 0, "id" after it.
4046+
// Metadata "_location" should be at output position 0, "id" after it.
40474047
let config = FileScanConfigBuilder::new(object_store_url, file_source)
40484048
.with_metadata_cols(metadata_cols)
40494049
.with_projection_indices(Some(vec![2, 0]))
@@ -4151,9 +4151,9 @@ mod tests {
41514151
assert_eq!(projected.field(1).name(), "year");
41524152
assert_eq!(projected.field(2).name(), "month");
41534153
assert_eq!(projected.field(3).name(), "day");
4154-
assert_eq!(projected.field(4).name(), "location");
4155-
assert_eq!(projected.field(5).name(), "size");
4156-
assert_eq!(projected.field(6).name(), "last_modified");
4154+
assert_eq!(projected.field(4).name(), "_location");
4155+
assert_eq!(projected.field(5).name(), "_size");
4156+
assert_eq!(projected.field(6).name(), "_last_modified");
41574157
}
41584158

41594159
/// Test that projection with only some metadata columns works correctly.
@@ -4206,7 +4206,7 @@ mod tests {
42064206
);
42074207

42084208
assert_eq!(projected.field(0).name(), "id");
4209-
assert_eq!(projected.field(1).name(), "location");
4209+
assert_eq!(projected.field(1).name(), "_location");
42104210
}
42114211

42124212
/// Test that projection with metadata columns in non-sequential order works.
@@ -4253,9 +4253,9 @@ mod tests {
42534253
projected.fields().len()
42544254
);
42554255

4256-
assert_eq!(projected.field(0).name(), "location");
4256+
assert_eq!(projected.field(0).name(), "_location");
42574257
assert_eq!(projected.field(1).name(), "id");
4258-
assert_eq!(projected.field(2).name(), "size");
4258+
assert_eq!(projected.field(2).name(), "_size");
42594259
}
42604260

42614261
/// Test partial projection selecting file, partition, AND metadata columns.
@@ -4311,7 +4311,7 @@ mod tests {
43114311

43124312
assert_eq!(projected.field(0).name(), "id");
43134313
assert_eq!(projected.field(1).name(), "year");
4314-
assert_eq!(projected.field(2).name(), "location");
4314+
assert_eq!(projected.field(2).name(), "_location");
43154315

43164316
// Now test reordered: SELECT location, year, id -> projection = [4, 2, 0]
43174317
let file_source2: Arc<dyn FileSource> =
@@ -4335,7 +4335,7 @@ mod tests {
43354335
projected2.fields().len()
43364336
);
43374337

4338-
assert_eq!(projected2.field(0).name(), "location");
4338+
assert_eq!(projected2.field(0).name(), "_location");
43394339
assert_eq!(projected2.field(1).name(), "year");
43404340
assert_eq!(projected2.field(2).name(), "id");
43414341
}
@@ -4380,7 +4380,7 @@ mod tests {
43804380
"Expected 1 column (location only), got {}",
43814381
projected.fields().len()
43824382
);
4383-
assert_eq!(projected.field(0).name(), "location");
4383+
assert_eq!(projected.field(0).name(), "_location");
43844384

43854385
// The file source should have an empty projection (no file/partition columns)
43864386
let source_proj = config
@@ -4398,7 +4398,7 @@ mod tests {
43984398
let binding = config.eq_properties();
43994399
let eq_schema = binding.schema();
44004400
assert_eq!(eq_schema.fields().len(), 1);
4401-
assert_eq!(eq_schema.field(0).name(), "location");
4401+
assert_eq!(eq_schema.field(0).name(), "_location");
44024402
}
44034403

44044404
/// Test that with_projection_indices correctly handles a mix of file,
@@ -4433,7 +4433,7 @@ mod tests {
44334433
let projected = config.projected_schema().expect("projected schema");
44344434
assert_eq!(projected.fields().len(), 3);
44354435
assert_eq!(projected.field(0).name(), "value");
4436-
assert_eq!(projected.field(1).name(), "location");
4436+
assert_eq!(projected.field(1).name(), "_location");
44374437
assert_eq!(projected.field(2).name(), "year");
44384438

44394439
// File source should have projection for indices [1, 2] (value, year)

datafusion/datasource/src/file_stream.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,8 +1055,8 @@ mod tests {
10551055
let schema_with_metadata = Arc::new(Schema::new(vec![
10561056
Field::new("id", DataType::Int32, false),
10571057
Field::new("value", DataType::Utf8, true),
1058-
Field::new("location", DataType::Utf8, true),
1059-
Field::new("size", DataType::UInt64, true),
1058+
Field::new("_location", DataType::Utf8, true),
1059+
Field::new("_size", DataType::UInt64, true),
10601060
]));
10611061

10621062
// Create the projector with metadata columns
@@ -1093,8 +1093,8 @@ mod tests {
10931093
Field::new("id", DataType::Int32, false),
10941094
Field::new("value", DataType::Utf8, true),
10951095
Field::new("year", DataType::Utf8, true),
1096-
Field::new("location", DataType::Utf8, true),
1097-
Field::new("size", DataType::UInt64, true),
1096+
Field::new("_location", DataType::Utf8, true),
1097+
Field::new("_size", DataType::UInt64, true),
10981098
]));
10991099

11001100
// Create the projector
@@ -1130,11 +1130,11 @@ mod tests {
11301130

11311131
// Create a schema with columns in a different order
11321132
let schema_mixed = Arc::new(Schema::new(vec![
1133-
Field::new("location", DataType::Utf8, true), // metadata column first
1133+
Field::new("_location", DataType::Utf8, true), // metadata column first
11341134
Field::new("id", DataType::Int32, false), // file column
11351135
Field::new("year", DataType::Utf8, true), // partition column
11361136
Field::new("value", DataType::Utf8, true), // file column
1137-
Field::new("size", DataType::UInt64, true), // metadata column last
1137+
Field::new("_size", DataType::UInt64, true), // metadata column last
11381138
]));
11391139

11401140
// Create the projector
@@ -1174,7 +1174,7 @@ mod tests {
11741174
.collect();
11751175

11761176
// Check location column
1177-
let location_idx = *field_indices.get("location").unwrap();
1177+
let location_idx = *field_indices.get("_location").unwrap();
11781178
let location_col = projected_batch.column(location_idx);
11791179
let location_array = location_col.as_any().downcast_ref::<StringArray>().unwrap();
11801180
assert_eq!(location_array.value(0), "test/file.parquet");
@@ -1201,7 +1201,7 @@ mod tests {
12011201
assert_eq!(value_array.value(0), "a");
12021202

12031203
// Check size column
1204-
let size_idx = *field_indices.get("size").unwrap();
1204+
let size_idx = *field_indices.get("_size").unwrap();
12051205
let size_col = projected_batch.column(size_idx);
12061206
let size_array = size_col.as_any().downcast_ref::<UInt64Array>().unwrap();
12071207
assert_eq!(size_array.value(0), 1024);

datafusion/datasource/src/metadata.rs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ impl fmt::Display for MetadataColumn {
5050
}
5151

5252
impl MetadataColumn {
53-
/// The name of the metadata column (one of `location`, `last_modified`, or `size`)
53+
/// The name of the metadata column (one of `_location`, `_last_modified`, or `_size`)
5454
pub fn name(&self) -> &str {
5555
match self {
56-
MetadataColumn::Location(_) => "location",
57-
MetadataColumn::LastModified => "last_modified",
58-
MetadataColumn::Size => "size",
56+
MetadataColumn::Location(_) => "_location",
57+
MetadataColumn::LastModified => "_last_modified",
58+
MetadataColumn::Size => "_size",
5959
}
6060
}
6161

@@ -114,11 +114,11 @@ impl FromStr for MetadataColumn {
114114

115115
fn from_str(s: &str) -> Result<Self, Self::Err> {
116116
match s {
117-
"location" => Ok(MetadataColumn::Location(None)),
118-
"last_modified" => Ok(MetadataColumn::LastModified),
119-
"size" => Ok(MetadataColumn::Size),
117+
"_location" => Ok(MetadataColumn::Location(None)),
118+
"_last_modified" => Ok(MetadataColumn::LastModified),
119+
"_size" => Ok(MetadataColumn::Size),
120120
_ => plan_err!(
121-
"Invalid metadata column: {}, expected: location, last_modified, or size",
121+
"Invalid metadata column: {}, expected: _location, _last_modified, or _size",
122122
s
123123
),
124124
}
@@ -180,9 +180,9 @@ mod tests {
180180

181181
#[test]
182182
fn test_metadata_column_name() {
183-
assert_eq!(MetadataColumn::Location(None).name(), "location");
184-
assert_eq!(MetadataColumn::LastModified.name(), "last_modified");
185-
assert_eq!(MetadataColumn::Size.name(), "size");
183+
assert_eq!(MetadataColumn::Location(None).name(), "_location");
184+
assert_eq!(MetadataColumn::LastModified.name(), "_last_modified");
185+
assert_eq!(MetadataColumn::Size.name(), "_size");
186186
}
187187

188188
#[test]
@@ -198,20 +198,20 @@ mod tests {
198198
#[test]
199199
fn test_metadata_column_field() {
200200
let field = MetadataColumn::Location(None).field();
201-
assert_eq!(field.name(), "location");
201+
assert_eq!(field.name(), "_location");
202202
assert_eq!(field.data_type(), &DataType::Utf8);
203203
assert!(field.is_nullable());
204204

205205
let field = MetadataColumn::LastModified.field();
206-
assert_eq!(field.name(), "last_modified");
206+
assert_eq!(field.name(), "_last_modified");
207207
assert_eq!(
208208
field.data_type(),
209209
&DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into()))
210210
);
211211
assert!(field.is_nullable());
212212

213213
let field = MetadataColumn::Size.field();
214-
assert_eq!(field.name(), "size");
214+
assert_eq!(field.name(), "_size");
215215
assert_eq!(field.data_type(), &DataType::UInt64);
216216
assert!(field.is_nullable());
217217
}
@@ -247,15 +247,15 @@ mod tests {
247247
fn test_metadata_column_from_str() {
248248
// Test valid values
249249
assert_eq!(
250-
MetadataColumn::from_str("location").unwrap(),
250+
MetadataColumn::from_str("_location").unwrap(),
251251
MetadataColumn::Location(None)
252252
);
253253
assert_eq!(
254-
MetadataColumn::from_str("last_modified").unwrap(),
254+
MetadataColumn::from_str("_last_modified").unwrap(),
255255
MetadataColumn::LastModified
256256
);
257257
assert_eq!(
258-
MetadataColumn::from_str("size").unwrap(),
258+
MetadataColumn::from_str("_size").unwrap(),
259259
MetadataColumn::Size
260260
);
261261

@@ -266,9 +266,9 @@ mod tests {
266266

267267
#[test]
268268
fn test_metadata_column_display() {
269-
assert_eq!(format!("{}", MetadataColumn::Location(None)), "location");
270-
assert_eq!(format!("{}", MetadataColumn::LastModified), "last_modified");
271-
assert_eq!(format!("{}", MetadataColumn::Size), "size");
269+
assert_eq!(format!("{}", MetadataColumn::Location(None)), "_location");
270+
assert_eq!(format!("{}", MetadataColumn::LastModified), "_last_modified");
271+
assert_eq!(format!("{}", MetadataColumn::Size), "_size");
272272
}
273273

274274
#[test]

0 commit comments

Comments
 (0)