Skip to content

Commit 7eb039b

Browse files
committed
docs: auto generate metrics documentation
1 parent 50d20dd commit 7eb039b

21 files changed

Lines changed: 789 additions & 102 deletions

File tree

.github/workflows/rust.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,11 @@ jobs:
708708
# If you encounter an error, run './dev/update_function_docs.sh' and commit
709709
./dev/update_function_docs.sh
710710
git diff --exit-code
711+
- name: Check if metrics.md has been modified
712+
run: |
713+
# If you encounter an error, run './dev/update_metric_docs.sh' and commit
714+
./dev/update_metric_docs.sh
715+
git diff --exit-code
711716
712717
# Verify MSRV for the crates which are directly used by other projects:
713718
# - datafusion

Cargo.lock

Lines changed: 17 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ datafusion-datasource-parquet = { workspace = true, optional = true }
129129
datafusion-execution = { workspace = true }
130130
datafusion-expr = { workspace = true, default-features = false }
131131
datafusion-expr-common = { workspace = true }
132+
datafusion-doc = { workspace = true }
132133
datafusion-functions = { workspace = true }
133134
datafusion-functions-aggregate = { workspace = true }
134135
datafusion-functions-nested = { workspace = true, default-features = false, optional = true }
@@ -167,7 +168,6 @@ ctor = { workspace = true }
167168
dashmap = "6.1.0"
168169
datafusion-doc = { workspace = true }
169170
datafusion-functions-window-common = { workspace = true }
170-
datafusion-macros = { workspace = true }
171171
datafusion-physical-optimizer = { workspace = true }
172172
doc-comment = { workspace = true }
173173
env_logger = { workspace = true }
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Print metrics documentation collected via `DocumentedMetrics`/`DocumentedExec`.
19+
//! Called from doc generation scripts to refresh `docs/source/user-guide/metrics.md`.
20+
21+
use std::collections::HashSet;
22+
23+
use datafusion_doc::metric_doc_sections::{
24+
ExecDoc, MetricDoc, MetricDocPosition, exec_docs, metric_docs,
25+
};
26+
use datafusion_execution as _; // Link metrics defined in execution crate.
27+
use datafusion_physical_plan as _; // Link metrics and execs defined in physical plan.
28+
29+
fn main() -> std::io::Result<()> {
30+
let mut content = String::new();
31+
let mut metrics: Vec<&MetricDoc> = metric_docs().collect();
32+
metrics.sort_by(|a, b| a.name.cmp(b.name));
33+
34+
let mut execs: Vec<&ExecDoc> = exec_docs().collect();
35+
execs.sort_by(|a, b| a.name.cmp(b.name));
36+
37+
let common: Vec<&MetricDoc> = metrics
38+
.iter()
39+
.copied()
40+
.filter(|m| m.position == MetricDocPosition::Common)
41+
.collect();
42+
43+
// Collect names of common metric types for filtering embedded fields
44+
let common_metric_names: HashSet<&str> = common.iter().map(|m| m.name).collect();
45+
46+
if !common.is_empty() {
47+
content.push_str("## Common Metrics\n\n");
48+
for metric in common {
49+
render_metric_doc(&mut content, metric, 3, &common_metric_names);
50+
}
51+
}
52+
53+
if !execs.is_empty() {
54+
content.push_str("## Operator-specific Metrics\n\n");
55+
for exec in execs {
56+
render_exec_doc(&mut content, exec, &common_metric_names);
57+
}
58+
}
59+
60+
println!("{content}");
61+
Ok(())
62+
}
63+
64+
fn render_exec_doc(
65+
out: &mut String,
66+
exec: &ExecDoc,
67+
common_metric_names: &HashSet<&str>,
68+
) {
69+
out.push_str(&heading(3, exec.name));
70+
out.push_str("\n\n");
71+
72+
if let Some(doc) = summarize(exec.doc) {
73+
if !doc.is_empty() {
74+
out.push_str(&sanitize(doc));
75+
out.push_str("\n\n");
76+
}
77+
}
78+
79+
// Filter to operator-specific metrics only (common metrics are documented separately)
80+
let mut metrics: Vec<&MetricDoc> = exec
81+
.metrics
82+
.iter()
83+
.copied()
84+
.filter(|metric| metric.position != MetricDocPosition::Common)
85+
.collect();
86+
metrics.sort_by(|a, b| a.name.cmp(b.name));
87+
88+
if metrics.is_empty() {
89+
out.push_str("_No operator-specific metrics documented._\n\n");
90+
} else {
91+
for metric in metrics {
92+
render_metric_doc(out, metric, 4, common_metric_names);
93+
}
94+
}
95+
}
96+
97+
fn render_metric_doc(
98+
out: &mut String,
99+
metric: &MetricDoc,
100+
heading_level: usize,
101+
common_metric_names: &HashSet<&str>,
102+
) {
103+
out.push_str(&heading(heading_level, metric.name));
104+
out.push_str("\n\n");
105+
106+
if let Some(doc) = summarize(metric.doc) {
107+
if !doc.is_empty() {
108+
out.push_str(&sanitize(doc));
109+
out.push_str("\n\n");
110+
}
111+
}
112+
113+
// Filter out fields whose type is a common metric (documented separately)
114+
let fields: Vec<_> = metric
115+
.fields
116+
.iter()
117+
.filter(|field| !common_metric_names.contains(field.type_name))
118+
.collect();
119+
120+
if fields.is_empty() {
121+
out.push_str("_No metrics documented._\n\n");
122+
return;
123+
}
124+
125+
out.push_str("| Metric | Description |\n");
126+
out.push_str("| --- | --- |\n");
127+
for field in fields {
128+
out.push_str(&format!("| {} | {} |\n", field.name, sanitize(field.doc)));
129+
}
130+
out.push('\n');
131+
}
132+
133+
fn heading(level: usize, title: &str) -> String {
134+
format!("{} {}", "#".repeat(level), title)
135+
}
136+
137+
fn summarize(doc: &str) -> Option<&str> {
138+
let trimmed = doc.trim();
139+
if trimmed.is_empty() {
140+
return None;
141+
}
142+
trimmed.split("\n\n").next().map(str::trim)
143+
}
144+
145+
fn sanitize(doc: &str) -> String {
146+
doc.split_whitespace().collect::<Vec<_>>().join(" ")
147+
}

datafusion/doc/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ license = { workspace = true }
2828
authors = { workspace = true }
2929
rust-version = { workspace = true }
3030

31+
[dependencies]
32+
inventory = "0.3.15"
33+
3134
[package.metadata.docs.rs]
3235
all-features = true
3336

datafusion/doc/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@
2323
)]
2424
#![cfg_attr(docsrs, feature(doc_cfg))]
2525

26+
mod metrics;
2627
mod udaf;
2728
mod udf;
2829
mod udwf;
2930

31+
pub use metrics::metric_doc_sections;
3032
pub use udaf::aggregate_doc_sections;
3133
pub use udf::scalar_doc_sections;
3234
pub use udwf::window_doc_sections;

datafusion/doc/src/metrics.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Documentation structures for execution metrics and operators.
19+
20+
/// Groupings and exports for metrics documentation (mirrors how function doc sections are exposed).
21+
pub mod metric_doc_sections {
22+
pub use super::{
23+
DocumentedExec, DocumentedMetrics, ExecDoc, ExecDocEntry, MetricDoc,
24+
MetricDocEntry, MetricDocPosition, MetricFieldDoc, exec_docs, metric_docs,
25+
};
26+
pub use inventory;
27+
}
28+
29+
/// Whether a metrics struct should be documented as common or operator-specific.
30+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31+
pub enum MetricDocPosition {
32+
/// Metrics that are reused across operators (for example [`BaselineMetrics`]).
33+
Common,
34+
/// Metrics that are tied to a specific operator.
35+
Operator,
36+
}
37+
38+
/// Documentation for a single metric field.
39+
#[derive(Debug)]
40+
pub struct MetricFieldDoc {
41+
/// Name of the metric.
42+
pub name: &'static str,
43+
/// Documentation for the metric.
44+
pub doc: &'static str,
45+
/// Type name of the metric field.
46+
pub type_name: &'static str,
47+
}
48+
49+
/// Documentation attached to a metrics struct.
50+
#[derive(Debug)]
51+
pub struct MetricDoc {
52+
/// Name of the metrics struct (usually ends with `Metrics`).
53+
pub name: &'static str,
54+
/// Documentation from the struct-level doc comment.
55+
pub doc: &'static str,
56+
/// Documentation for each metric field.
57+
pub fields: &'static [MetricFieldDoc],
58+
/// Whether the metrics are common or operator-specific.
59+
pub position: MetricDocPosition,
60+
}
61+
62+
/// Documentation for an execution plan implementation.
63+
#[derive(Debug)]
64+
pub struct ExecDoc {
65+
/// Name of the execution plan struct (usually ends with `Exec`).
66+
pub name: &'static str,
67+
/// Documentation from the struct-level doc comment.
68+
pub doc: &'static str,
69+
/// Metrics exposed by this operator.
70+
pub metrics: &'static [&'static MetricDoc],
71+
}
72+
73+
/// Trait implemented for metrics structs to expose their documentation.
74+
pub trait DocumentedMetrics {
75+
/// Static documentation for this metrics struct.
76+
const DOC: &'static MetricDoc;
77+
78+
/// Returns the documentation for this metrics struct.
79+
fn metric_doc() -> &'static MetricDoc {
80+
Self::DOC
81+
}
82+
}
83+
84+
/// Trait implemented for execution plan structs to expose their documentation.
85+
pub trait DocumentedExec {
86+
/// Returns the documentation for this operator.
87+
fn exec_doc() -> &'static ExecDoc;
88+
}
89+
90+
#[derive(Debug)]
91+
pub struct MetricDocEntry(pub &'static MetricDoc);
92+
93+
#[derive(Debug)]
94+
pub struct ExecDocEntry(pub &'static ExecDoc);
95+
96+
/// Iterate over all registered metrics docs.
97+
pub fn metric_docs() -> impl Iterator<Item = &'static MetricDoc> {
98+
inventory::iter::<MetricDocEntry>
99+
.into_iter()
100+
.map(|entry| entry.0)
101+
}
102+
103+
/// Iterate over all registered execution plan docs.
104+
pub fn exec_docs() -> impl Iterator<Item = &'static ExecDoc> {
105+
inventory::iter::<ExecDocEntry>
106+
.into_iter()
107+
.map(|entry| entry.0)
108+
}
109+
110+
inventory::collect!(MetricDocEntry);
111+
inventory::collect!(ExecDocEntry);

datafusion/execution/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ async-trait = { workspace = true }
5454
chrono = { workspace = true }
5555
dashmap = { workspace = true }
5656
datafusion-common = { workspace = true, default-features = false }
57+
datafusion-doc = { workspace = true }
5758
datafusion-expr = { workspace = true, default-features = false }
59+
datafusion-macros = { workspace = true }
5860
futures = { workspace = true }
5961
log = { workspace = true }
6062
object_store = { workspace = true, features = ["fs"] }

datafusion/execution/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub mod registry {
4646
};
4747
}
4848

49+
pub use datafusion_doc::metric_doc_sections;
4950
pub use disk_manager::DiskManager;
5051
pub use registry::FunctionRegistry;
5152
pub use stream::{RecordBatchStream, SendableRecordBatchStream};

0 commit comments

Comments
 (0)