Skip to content

Commit 45cd6c4

Browse files
committed
feat: add metric_doc macro to existing metric and exec structs
1 parent 31e2b0c commit 45cd6c4

25 files changed

Lines changed: 313 additions & 57 deletions

File tree

Cargo.lock

Lines changed: 35 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@ sysinfo = "0.37.2"
185185
test-utils = { path = "../../test-utils" }
186186
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot", "fs"] }
187187

188+
[build-dependencies]
189+
toml = "0.9"
190+
188191
[package.metadata.cargo-machete]
189192
ignored = ["datafusion-doc", "datafusion-macros", "dashmap"]
190193

datafusion/core/build.rs

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use std::env;
1919
use std::fs;
2020
use std::io::{Read, Write};
2121
use std::path::Path;
22+
use toml::value::Table;
2223

2324
fn main() {
2425
let out_dir = env::var("OUT_DIR").unwrap();
@@ -34,10 +35,13 @@ fn main() {
3435

3536
// Read Cargo.toml to check dependencies
3637
let cargo_toml_path = Path::new(&core_crate_dir).join("Cargo.toml");
37-
let mut cargo_toml_content = String::new();
38-
if let Ok(mut file) = fs::File::open(&cargo_toml_path) {
39-
file.read_to_string(&mut cargo_toml_content).unwrap();
40-
}
38+
let cargo_toml_content =
39+
fs::read_to_string(&cargo_toml_path).expect("core Cargo.toml must be readable");
40+
let cargo_manifest: toml::Value =
41+
toml::from_str(&cargo_toml_content).expect("core Cargo.toml must be valid TOML");
42+
let dependencies = manifest_table(&cargo_manifest, "dependencies");
43+
let dev_dependencies = manifest_table(&cargo_manifest, "dev-dependencies");
44+
let features = manifest_table(&cargo_manifest, "features");
4145

4246
if let Ok(entries) = fs::read_dir(datafusion_dir) {
4347
for entry in entries.flatten() {
@@ -53,11 +57,9 @@ fn main() {
5357
continue;
5458
}
5559

56-
// Skip if not a dependency in Cargo.toml
57-
// This is a rough check
58-
if !cargo_toml_content.contains(&format!("{crate_name} ="))
59-
&& !cargo_toml_content.contains(&format!("\"{crate_name}\""))
60-
{
60+
let is_dependency = dependencies.contains_key(&crate_name)
61+
|| dev_dependencies.contains_key(&crate_name);
62+
if !is_dependency {
6163
continue;
6264
}
6365

@@ -81,12 +83,9 @@ fn main() {
8183
for krate in crates_with_metrics {
8284
let krate_snake = krate.replace("-", "_");
8385

84-
let is_optional = cargo_toml_content
85-
.lines()
86-
.any(|line| line.contains(&krate) && line.contains("optional = true"));
87-
88-
if is_optional {
89-
writeln!(f, " #[cfg(feature = \"{krate_snake}\")]").unwrap();
86+
if is_optional_dependency(&dependencies, &krate) {
87+
let feature = dependency_feature(&features, &krate);
88+
writeln!(f, " #[cfg(feature = \"{feature}\")]").unwrap();
9089
}
9190

9291
writeln!(f, " {{").unwrap();
@@ -100,6 +99,39 @@ fn main() {
10099
println!("cargo:rerun-if-changed=Cargo.toml");
101100
}
102101

102+
fn manifest_table(manifest: &toml::Value, key: &str) -> Table {
103+
manifest
104+
.get(key)
105+
.and_then(|value| value.as_table())
106+
.cloned()
107+
.unwrap_or_default()
108+
}
109+
110+
fn is_optional_dependency(dependencies: &Table, crate_name: &str) -> bool {
111+
dependencies
112+
.get(crate_name)
113+
.and_then(|dep| dep.as_table())
114+
.and_then(|dep| dep.get("optional"))
115+
.and_then(|value| value.as_bool())
116+
.unwrap_or(false)
117+
}
118+
119+
fn dependency_feature(features: &Table, crate_name: &str) -> String {
120+
let dep_feature = format!("dep:{crate_name}");
121+
for (feature, deps) in features {
122+
if let Some(array) = deps.as_array() {
123+
for dep in array {
124+
if let Some(dep_str) = dep.as_str() {
125+
if dep_str == crate_name || dep_str == dep_feature {
126+
return feature.clone();
127+
}
128+
}
129+
}
130+
}
131+
}
132+
crate_name.to_string()
133+
}
134+
103135
fn has_metric_doc(dir: &Path) -> bool {
104136
if let Ok(entries) = fs::read_dir(dir) {
105137
for entry in entries.flatten() {

datafusion/core/src/bin/print_metric_docs.rs

Lines changed: 9 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
//! Print metrics documentation collected via `DocumentedMetrics`/`DocumentedExec`.
1919
//! Called from doc generation scripts to refresh `docs/source/user-guide/metrics.md`.
2020
21-
use std::collections::HashSet;
22-
2321
use datafusion_expr::metric_doc_sections::{
2422
ExecDoc, MetricDoc, MetricDocPosition, exec_docs, metric_docs,
2523
};
@@ -40,32 +38,25 @@ fn main() -> std::io::Result<()> {
4038
.filter(|m| m.position == MetricDocPosition::Common)
4139
.collect();
4240

43-
// Collect names of common metric types for filtering embedded fields
44-
let common_metric_names: HashSet<&str> = common.iter().map(|m| m.name).collect();
45-
4641
if !common.is_empty() {
4742
content.push_str("## Common Metrics\n\n");
4843
for metric in common {
49-
render_metric_doc(&mut content, metric, 3, &common_metric_names);
44+
render_metric_doc(&mut content, metric, 3);
5045
}
5146
}
5247

5348
if !execs.is_empty() {
5449
content.push_str("## Operator-specific Metrics\n\n");
5550
for exec in execs {
56-
render_exec_doc(&mut content, exec, &common_metric_names);
51+
render_exec_doc(&mut content, exec);
5752
}
5853
}
5954

6055
println!("{content}");
6156
Ok(())
6257
}
6358

64-
fn render_exec_doc(
65-
out: &mut String,
66-
exec: &ExecDoc,
67-
common_metric_names: &HashSet<&str>,
68-
) {
59+
fn render_exec_doc(out: &mut String, exec: &ExecDoc) {
6960
out.push_str(&heading(3, exec.name));
7061
out.push_str("\n\n");
7162

@@ -86,20 +77,17 @@ fn render_exec_doc(
8677
metrics.sort_by(|a, b| a.name.cmp(b.name));
8778

8879
if metrics.is_empty() {
89-
out.push_str("_No operator-specific metrics documented._\n\n");
80+
out.push_str(
81+
"_No operator-specific metrics documented (see Common Metrics)._\n\n",
82+
);
9083
} else {
9184
for metric in metrics {
92-
render_metric_doc(out, metric, 4, common_metric_names);
85+
render_metric_doc(out, metric, 4);
9386
}
9487
}
9588
}
9689

97-
fn render_metric_doc(
98-
out: &mut String,
99-
metric: &MetricDoc,
100-
heading_level: usize,
101-
common_metric_names: &HashSet<&str>,
102-
) {
90+
fn render_metric_doc(out: &mut String, metric: &MetricDoc, heading_level: usize) {
10391
out.push_str(&heading(heading_level, metric.name));
10492
out.push_str("\n\n");
10593

@@ -110,21 +98,9 @@ fn render_metric_doc(
11098
out.push_str("\n\n");
11199
}
112100

113-
// Filter out fields whose type is a common metric (documented separately)
114-
let fields: Vec<_> = metric
115-
.fields
116-
.iter()
117-
.filter(|field| !common_metric_names.contains(field.type_name))
118-
.collect();
119-
120-
if fields.is_empty() {
121-
out.push_str("_No metrics documented._\n\n");
122-
return;
123-
}
124-
125101
out.push_str("| Metric | Description |\n");
126102
out.push_str("| --- | --- |\n");
127-
for field in fields {
103+
for field in metric.fields {
128104
out.push_str(&format!("| {} | {} |\n", field.name, sanitize(field.doc)));
129105
}
130106
out.push('\n');

datafusion/datasource-parquet/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,11 @@ bytes = { workspace = true }
3737
datafusion-common = { workspace = true, features = ["object_store", "parquet"] }
3838
datafusion-common-runtime = { workspace = true }
3939
datafusion-datasource = { workspace = true }
40+
datafusion-doc = { workspace = true }
4041
datafusion-execution = { workspace = true }
4142
datafusion-expr = { workspace = true }
4243
datafusion-functions-aggregate-common = { workspace = true }
44+
datafusion-macros = { workspace = true }
4345
datafusion-physical-expr = { workspace = true }
4446
datafusion-physical-expr-adapter = { workspace = true }
4547
datafusion-physical-expr-common = { workspace = true }

datafusion/datasource-parquet/src/metrics.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use datafusion_macros::metric_doc;
1819
use datafusion_physical_plan::metrics::{
1920
Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, PruningMetrics,
2021
RatioMergeStrategy, RatioMetrics, Time,
@@ -26,6 +27,7 @@ use datafusion_physical_plan::metrics::{
2627
/// through [`ParquetFileReaderFactory`].
2728
///
2829
/// [`ParquetFileReaderFactory`]: super::ParquetFileReaderFactory
30+
#[metric_doc]
2931
#[derive(Debug, Clone)]
3032
pub struct ParquetFileMetrics {
3133
/// Number of file **ranges** pruned or matched by partition or file level statistics.

datafusion/datasource/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ bzip2 = { workspace = true, optional = true }
4949
chrono = { workspace = true }
5050
datafusion-common = { workspace = true, features = ["object_store"] }
5151
datafusion-common-runtime = { workspace = true }
52+
datafusion-doc = { workspace = true }
5253
datafusion-execution = { workspace = true }
5354
datafusion-expr = { workspace = true }
55+
datafusion-macros = { workspace = true }
5456
datafusion-physical-expr = { workspace = true }
5557
datafusion-physical-expr-adapter = { workspace = true }
5658
datafusion-physical-expr-common = { workspace = true }

datafusion/datasource/src/file_stream.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use crate::file_scan_config::FileScanConfig;
3232
use arrow::datatypes::SchemaRef;
3333
use datafusion_common::error::Result;
3434
use datafusion_execution::RecordBatchStream;
35+
use datafusion_macros::metric_doc;
3536
use datafusion_physical_plan::metrics::{
3637
BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, Time,
3738
};
@@ -360,6 +361,7 @@ impl StartableTime {
360361
/// as other operators.
361362
///
362363
/// [`FileStream`]: <https://github.com/apache/datafusion/blob/main/datafusion/datasource/src/file_stream.rs>
364+
#[metric_doc]
363365
pub struct FileStreamMetrics {
364366
/// Wall clock time elapsed for file opening.
365367
///

datafusion/datasource/src/source.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ use datafusion_physical_plan::{
3636
use itertools::Itertools;
3737

3838
use crate::file_scan_config::FileScanConfig;
39+
use crate::file_stream::FileStreamMetrics;
3940
use datafusion_common::config::ConfigOptions;
4041
use datafusion_common::{Constraints, Result, Statistics};
4142
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
43+
use datafusion_macros::metric_doc;
4244
use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr};
4345
use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
4446
use datafusion_physical_plan::SortOrderPushdownResult;
@@ -224,6 +226,7 @@ pub trait DataSource: Send + Sync + Debug {
224226
/// the [`FileSource`] trait.
225227
///
226228
/// [`FileSource`]: crate::file::FileSource
229+
#[metric_doc(FileStreamMetrics)]
227230
#[derive(Clone, Debug)]
228231
pub struct DataSourceExec {
229232
/// The source of the data -- for example, `FileScanConfig` or `MemorySourceConfig`

datafusion/execution/src/metrics/baseline.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ impl Drop for BaselineMetrics {
178178

179179
/// Helper for creating and tracking spill-related metrics for
180180
/// each operator
181+
#[metric_doc(common)]
181182
#[derive(Debug, Clone)]
182183
pub struct SpillMetrics {
183184
/// count of spills during the execution of the operator
@@ -202,6 +203,7 @@ impl SpillMetrics {
202203
}
203204

204205
/// Metrics for tracking batch splitting activity
206+
#[metric_doc(common)]
205207
#[derive(Debug, Clone)]
206208
pub struct SplitMetrics {
207209
/// Number of times an input [`RecordBatch`] was split

0 commit comments

Comments
 (0)