Skip to content

Commit 334f722

Browse files
committed
Add heap_size to statistics
This adds a heap_size method retruning the amount of memory a statistics struct allocates on the heap.
1 parent 9a9ff8d commit 334f722

2 files changed

Lines changed: 60 additions & 2 deletions

File tree

datafusion/common/src/stats.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,13 @@ impl Statistics {
321321
}
322322
}
323323

324+
/// Returns the memory size in bytes.
325+
pub fn heap_size(&self) -> usize {
326+
// column_statistics + num_rows + total_byte_size
327+
self.column_statistics.capacity() * size_of::<ColumnStatistics>()
328+
+ size_of::<Precision<usize>>() * 2
329+
}
330+
324331
/// Calculates `total_byte_size` based on the schema and `num_rows`.
325332
/// If any of the columns has non-primitive width, `total_byte_size` is set to inexact.
326333
pub fn calculate_total_byte_size(&mut self, schema: &Schema) {
@@ -1757,4 +1764,55 @@ mod tests {
17571764
// total_byte_size should fall back to scaling: 8000 * 0.1 = 800
17581765
assert_eq!(result.total_byte_size, Precision::Inexact(800));
17591766
}
1767+
1768+
#[test]
1769+
fn test_statistics_heap_size() {
1770+
1771+
let stats = Statistics {
1772+
num_rows: Precision::Exact(100),
1773+
total_byte_size: Precision::Exact(100),
1774+
column_statistics: vec![],
1775+
};
1776+
1777+
assert_eq!(stats.heap_size(), 32);
1778+
1779+
let stats = Statistics {
1780+
num_rows: Precision::Exact(100),
1781+
total_byte_size: Precision::Exact(100),
1782+
column_statistics: vec![ColumnStatistics {
1783+
null_count: Precision::Absent,
1784+
max_value: Precision::Absent,
1785+
min_value: Precision::Absent,
1786+
sum_value: Precision::Absent,
1787+
distinct_count: Precision::Absent,
1788+
byte_size: Precision::Exact(100),
1789+
}],
1790+
};
1791+
1792+
assert_eq!(stats.heap_size(), 320);
1793+
1794+
let stats = Statistics {
1795+
num_rows: Precision::Exact(100),
1796+
total_byte_size: Precision::Exact(100),
1797+
column_statistics: vec![
1798+
ColumnStatistics {
1799+
null_count: Precision::Absent,
1800+
max_value: Precision::Absent,
1801+
min_value: Precision::Absent,
1802+
sum_value: Precision::Absent,
1803+
distinct_count: Precision::Absent,
1804+
byte_size: Precision::Exact(100),
1805+
},
1806+
ColumnStatistics {
1807+
null_count: Precision::Exact(10),
1808+
max_value: Precision::Absent,
1809+
min_value: Precision::Absent,
1810+
sum_value: Precision::Absent,
1811+
distinct_count: Precision::Absent,
1812+
byte_size: Precision::Exact(100),
1813+
},
1814+
],
1815+
};
1816+
assert_eq!(stats.heap_size(), 608);
1817+
}
17601818
}

datafusion/execution/src/cache/cache_unit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache {
5555
num_rows: stats.num_rows,
5656
num_columns: stats.column_statistics.len(),
5757
table_size_bytes: stats.total_byte_size,
58-
statistics_size_bytes: 0, // TODO: set to the real size in the future
58+
statistics_size_bytes: stats.heap_size(),
5959
},
6060
);
6161
}
@@ -196,7 +196,7 @@ mod tests {
196196
num_rows: Precision::Absent,
197197
num_columns: 1,
198198
table_size_bytes: Precision::Absent,
199-
statistics_size_bytes: 0,
199+
statistics_size_bytes: 320,
200200
}
201201
)])
202202
);

0 commit comments

Comments
 (0)