datafusion-contrib
diff --git a/‎.claude/settings.local.json‎
Lines changed: 2 additions & 1 deletion b/‎.claude/settings.local.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎crates/datafusion-app/src/flightsql.rs‎
Lines changed: 38 additions & 30 deletions b/‎crates/datafusion-app/src/flightsql.rs‎
Lines changed: 38 additions & 30 deletions
diff --git a/‎crates/datafusion-app/src/stats.rs‎
Lines changed: 38 additions & 32 deletions b/‎crates/datafusion-app/src/stats.rs‎
Lines changed: 38 additions & 32 deletions
diff --git a/‎docs/cli.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/cli.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/flightsql_analyze_protocol.md‎
Lines changed: 41 additions & 37 deletions b/‎docs/flightsql_analyze_protocol.md‎
Lines changed: 41 additions & 37 deletions
@@ -32,7 +32,8 @@
       "Bash(/Users/matth/OpenSource/datafusion-tui/target/debug/dft:*)",
       "Bash(./target/debug/dft:*)",
       "Bash(xargs rg:*)",
-      "Bash(rg:*)"
+      "Bash(rg:*)",
+      "Bash(pkill:*)"
     ],
     "deny": [],
     "ask": []
 
@@ -473,42 +473,39 @@ impl FlightSQLContext {
         }
     }
 
-    /// Get raw metrics batches without reconstruction (for --analyze-raw)
+    /// Get raw metrics batch without reconstruction (for --analyze-raw)
     pub async fn analyze_query_raw(
         &self,
         query: &str,
-    ) -> Result<(datafusion::arrow::array::RecordBatch, datafusion::arrow::array::RecordBatch)> {
+    ) -> Result<(String, datafusion::arrow::array::RecordBatch)> {
         self.fetch_analyze_batches(query).await
     }
 
     /// Reconstruct ExecutionStats from metrics (for --analyze)
     pub async fn analyze_query(&self, query: &str) -> Result<crate::stats::ExecutionStats> {
-        use datafusion::arrow::array::StringArray;
-
-        let (queries_batch, metrics_batch) = self.fetch_analyze_batches(query).await?;
-
-        // Extract query string from queries batch
-        let query_array = queries_batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .ok_or_else(|| eyre::eyre!("Invalid queries batch schema"))?;
-        let query_str = query_array.value(0).to_string();
+        let (query_str, metrics_batch) = self.fetch_analyze_batches(query).await?;
 
         // Reconstruct ExecutionStats from metrics table
         let stats = crate::stats::ExecutionStats::from_metrics_table(metrics_batch, query_str)?;
 
         Ok(stats)
     }
 
-    /// Shared logic to fetch analyze batches from server
+    /// Shared logic to fetch analyze batch and query from server
     async fn fetch_analyze_batches(
         &self,
         query: &str,
-    ) -> Result<(datafusion::arrow::array::RecordBatch, datafusion::arrow::array::RecordBatch)> {
+    ) -> Result<(String, datafusion::arrow::array::RecordBatch)> {
         use arrow_flight::utils::flight_data_to_batches;
         use arrow_flight::{Action, FlightData};
 
+        // Validate that query contains only a single statement
+        let dialect = datafusion::sql::sqlparser::dialect::GenericDialect {};
+        let statements = DFParser::parse_sql_with_dialect(query, &dialect)?;
+        if statements.len() != 1 {
+            return Err(eyre::eyre!("Only a single SQL statement can be analyzed"));
+        }
+
         // 1. Create Action with type "analyze_query" and SQL in body
         let action = Action {
             r#type: "analyze_query".to_string(),
@@ -533,35 +530,46 @@ impl FlightSQLContext {
             result_messages.push(result);
         }
 
-        // 4. Decode each Result message to FlightData
+        // 4. Decode each Result message to FlightData and extract query from metadata
         let mut all_flight_data = Vec::new();
+        let mut sql_query = None;
 
         for result in result_messages {
             // Deserialize the FlightData from the Result.body bytes using prost
-            // Note: FlightData implements prost::Message
             let flight_data = <FlightData as prost::Message>::decode(result.body.as_ref())
                 .map_err(|e| eyre::eyre!("Failed to decode FlightData: {}", e))?;
 
+            // Extract SQL from schema message (first message) metadata
+            if sql_query.is_none() && !flight_data.app_metadata.is_empty() {
+                sql_query = Some(
+                    String::from_utf8(flight_data.app_metadata.to_vec())
+                        .map_err(|e| eyre::eyre!("Invalid UTF-8 in metadata: {}", e))?,
+                );
+            }
+
             all_flight_data.push(flight_data);
         }
 
-        // 5. Convert all FlightData to RecordBatches using flight_data_to_batches
-        let batches = flight_data_to_batches(&all_flight_data)
-            .map_err(|e| eyre::eyre!("Failed to decode batches: {}", e))?;
-
-        // 6. Split batches - first set is queries (1 batch with schema), second set is metrics
-        // The batches_to_flight_data function generates: [schema, data batch] for each table
-        // So we expect: [queries_schema, queries_batch, metrics_schema, metrics_batch]
-        // But flight_data_to_batches returns just the data batches, not schemas
-        // So we expect: [queries_batch, metrics_batch]
+        // 5. Validate we got the SQL query in metadata
+        let query_str = sql_query
+            .ok_or_else(|| eyre::eyre!("SQL query not found in response metadata"))?;
 
-        if batches.len() < 2 {
+        // 6. Decode metrics batch
+        // batches_to_flight_data creates [schema, data] for the batch
+        if all_flight_data.len() < 2 {
             return Err(eyre::eyre!(
-                "Invalid analyze response: expected at least 2 batches, got {}",
-                batches.len()
+                "Invalid analyze response: expected at least 2 FlightData messages (schema + data), got {}",
+                all_flight_data.len()
             ));
         }
 
-        Ok((batches[0].clone(), batches[1].clone()))
+        let metrics_batches = flight_data_to_batches(&all_flight_data)
+            .map_err(|e| eyre::eyre!("Failed to decode metrics batch: {}", e))?;
+
+        if metrics_batches.is_empty() {
+            return Err(eyre::eyre!("No metrics batch found in response"));
+        }
+
+        Ok((query_str, metrics_batches[0].clone()))
     }
 }
@@ -450,39 +450,42 @@ impl ExecutionComputeStats {
         compute: &Option<Vec<PartitionsComputeStats>>,
         label: &str,
     ) -> std::fmt::Result {
-        if let (Some(filter_compute), Some(elapsed_compute)) = (compute, &self.elapsed_compute) {
-            let partitions = filter_compute.iter().fold(0, |acc, c| acc + c.partitions());
-            writeln!(
-                f,
-                "{label} Stats ({} nodes, {} partitions)",
-                filter_compute.len(),
-                partitions
-            )?;
-            writeln!(
-                f,
-                "{:<30} {:<16} {:<16} {:<16} {:<16} {:<16}",
-                "Node(Partitions)", "Min", "Median", "Mean", "Max", "Total (%)"
-            )?;
-            filter_compute.iter().try_for_each(|node| {
-                let (min, median, mean, max, total) = node.summary_stats();
-                let total = format!(
-                    "{} ({:.2}%)",
-                    total,
-                    (total as f32 / *elapsed_compute as f32) * 100.0
-                );
+        match (compute, &self.elapsed_compute) {
+            (Some(filter_compute), Some(elapsed_compute)) if !filter_compute.is_empty() => {
+                let partitions = filter_compute.iter().fold(0, |acc, c| acc + c.partitions());
+                writeln!(
+                    f,
+                    "{label}: {} nodes, {} partitions",
+                    filter_compute.len(),
+                    partitions
+                )?;
                 writeln!(
                     f,
                     "{:<30} {:<16} {:<16} {:<16} {:<16} {:<16}",
-                    format!("{}({})", node.name, node.elapsed_computes.len()),
-                    min,
-                    median,
-                    mean,
-                    max,
-                    total,
-                )
-            })
-        } else {
-            writeln!(f, "No {label} Stats")
+                    "Node(Partitions)", "Min", "Median", "Mean", "Max", "Total (%)"
+                )?;
+                filter_compute.iter().try_for_each(|node| {
+                    let (min, median, mean, max, total) = node.summary_stats();
+                    let total = format!(
+                        "{} ({:.2}%)",
+                        total,
+                        (total as f32 / *elapsed_compute as f32) * 100.0
+                    );
+                    writeln!(
+                        f,
+                        "{:<30} {:<16} {:<16} {:<16} {:<16} {:<16}",
+                        format!("{}({})", node.name, node.elapsed_computes.len()),
+                        min,
+                        median,
+                        mean,
+                        max,
+                        total,
+                    )
+                })
+            }
+            _ => {
+                writeln!(f, "{label}: No data")
+            }
         }
     }
 }
@@ -503,16 +506,19 @@ impl std::fmt::Display for ExecutionComputeStats {
                 .unwrap_or("None".to_string()),
         )?;
         writeln!(f)?;
+
+        // Always display all categories in the same order as FlightSQL protocol:
+        // Projection, Filter, Sort, Aggregate, Join, Other
         self.display_compute(f, &self.projection_compute, "Projection")?;
         writeln!(f)?;
         self.display_compute(f, &self.filter_compute, "Filter")?;
         writeln!(f)?;
         self.display_compute(f, &self.sort_compute, "Sort")?;
         writeln!(f)?;
-        self.display_compute(f, &self.join_compute, "Join")?;
-        writeln!(f)?;
         self.display_compute(f, &self.aggregate_compute, "Aggregate")?;
         writeln!(f)?;
+        self.display_compute(f, &self.join_compute, "Join")?;
+        writeln!(f)?;
         self.display_compute(f, &self.other_compute, "Other")?;
         writeln!(f)
     }
 
@@ -133,6 +133,8 @@ The output from `EXPLAIN ANALYZE` provides a wealth of information on a queries
 
 To help with this the `--analyze` flag can used to generate a summary of the underlying `ExecutionPlan` `MetricSet`s.  The summary presents the information in a way that is hopefully easier to understand and easier to draw conclusions on a query's performance.
 
+**Important**: The analyze feature only supports a single SQL statement. If you provide multiple statements (e.g., separated by semicolons) or multiple files/commands, an error will be returned.
+
 This feature is still in it's early stages and is expected to evolve.  Once it has gone through enough real world testing and it has been confirmed the metrics make sense documentation will be added on the exact calculations - until then the source will need to be inspected to see the calculations.
 
 ### Local Analyze
 
@@ -27,6 +27,8 @@ The FlightSQL Analyze Protocol enables clients to retrieve detailed execution me
 
 **Request Body**: UTF-8 encoded SQL query string
 
+**Important**: The SQL query must contain exactly one SQL statement. Multiple statements (e.g., separated by semicolons) are not supported and will result in an error.
+
 **Example**:
 ```rust
 Action {
@@ -39,29 +41,20 @@ Action {
 
 ### Response Format
 
-The response is a stream of `arrow_flight::Result` messages. Each `Result.body` contains serialized `FlightData` messages. The stream provides two Arrow RecordBatches:
-
-#### Batch 1: Queries Batch
+The response is a stream of `arrow_flight::Result` messages. Each `Result.body` contains serialized `FlightData` messages.
 
-**Purpose**: Contains the analyzed query text
+#### Response Metadata
 
-**Schema**:
-| Column | Type | Nullable | Description |
-|--------|------|----------|-------------|
-| query | Utf8 | false | The SQL query that was analyzed |
+The first `FlightData` message (schema message) contains the query text in its metadata:
 
-**Cardinality**: Exactly 1 row
+**Metadata Key**: `"sql_query"`
+**Metadata Value**: UTF-8 encoded SQL query string
 
-**Example**:
-```
-query
---------------------------------------------------
-SELECT * FROM table WHERE id > 100
-```
+This allows the client to correlate the metrics with the original query without requiring a separate record batch.
 
-#### Batch 2: Metrics Batch
+#### Metrics Batch
 
-**Purpose**: Flat table where each row represents a single metric
+**Purpose**: Single Arrow RecordBatch containing a flat table where each row represents a single metric
 
 **Schema**:
 | Column | Type | Nullable | Description |
@@ -183,11 +176,10 @@ Detailed breakdown by operator and partition:
 
 ## Example Response
 
-### Queries Batch
+### Response Metadata
 ```
-query
---------------------------------------------------
-SELECT * FROM table WHERE id > 100
+Metadata in schema FlightData message:
+  sql_query: "SELECT * FROM table WHERE id > 100"
 ```
 
 ### Metrics Batch
@@ -243,8 +235,9 @@ To implement this protocol in a FlightSQL server:
    - Emit one row per metric value
 
 5. **Build Response**
-   - Create two RecordBatches (queries + metrics)
+   - Create metrics RecordBatch
    - Encode as FlightData using `batches_to_flight_data()` or equivalent
+   - Add SQL query to the schema FlightData message metadata with key `"sql_query"`
    - Serialize each FlightData to bytes (protobuf encoding)
    - Wrap each serialized FlightData in `arrow_flight::Result { body: bytes }`
    - Stream Result messages to client
@@ -261,18 +254,20 @@ async fn do_action_fallback(&self, request: Request<Action>) -> Result<Response<
         // 2. Execute with metrics
         let stats = self.analyze_query(&sql).await?;
 
-        // 3. Convert to batches
-        let queries_batch = create_queries_batch(vec![sql])?;
+        // 3. Convert to metrics batch
         let metrics_batch = stats.to_metrics_table()?;
 
-        // 4. Encode as FlightData
-        let queries_flight_data = batches_to_flight_data(&queries_batch.schema(), vec![queries_batch])?;
-        let metrics_flight_data = batches_to_flight_data(&metrics_batch.schema(), vec![metrics_batch])?;
+        // 4. Encode as FlightData with SQL in metadata
+        let mut flight_data = batches_to_flight_data(&metrics_batch.schema(), vec![metrics_batch])?;
+
+        // Add SQL query to schema message metadata
+        if let Some(schema_msg) = flight_data.first_mut() {
+            schema_msg.app_metadata = sql.as_bytes().to_vec().into();
+        }
 
         // 5. Serialize and wrap in Result messages
-        let results: Vec<arrow_flight::Result> = queries_flight_data
+        let results: Vec<arrow_flight::Result> = flight_data
             .into_iter()
-            .chain(metrics_flight_data.into_iter())
             .map(|fd| arrow_flight::Result { body: fd.encode_to_vec().into() })
             .collect();
 
@@ -300,37 +295,46 @@ To consume this protocol:
 2. **Receive Stream**
    - Collect `arrow_flight::Result` messages from stream
 
-3. **Decode FlightData**
+3. **Decode FlightData and Extract Metadata**
    ```rust
    let mut flight_data_vec = Vec::new();
+   let mut sql_query = None;
+
    for result in result_messages {
        let flight_data = FlightData::decode(result.body.as_ref())?;
+
+       // Extract SQL from first message (schema) metadata
+       if sql_query.is_none() && !flight_data.app_metadata.is_empty() {
+           sql_query = Some(String::from_utf8(flight_data.app_metadata.to_vec())?);
+       }
+
        flight_data_vec.push(flight_data);
    }
    ```
 
-4. **Convert to RecordBatches**
+4. **Convert to RecordBatch**
    ```rust
    let batches = flight_data_to_batches(&flight_data_vec)?;
-   let queries_batch = batches[0].clone();
-   let metrics_batch = batches[1].clone();
+   let metrics_batch = batches[0].clone();
+   let query_text = sql_query.expect("SQL query not found in metadata");
    ```
 
-5. **Extract Data**
-   - Parse queries batch to get SQL text
+5. **Reconstruct Statistics**
+   - Use query text from metadata
    - Parse metrics batch to reconstruct execution statistics
 
 ### Error Handling
 
 **Server Errors**:
 - `Status::unimplemented` - Server doesn't support analyze protocol
-- `Status::invalid_argument` - Invalid SQL or malformed request
+- `Status::invalid_argument` - Invalid SQL, malformed request, or multiple SQL statements provided
 - `Status::internal` - Query execution or serialization failure
 
 **Client Handling**:
 - Gracefully handle `unimplemented` with clear user message
 - Retry transient errors as appropriate
-- Validate response format (expect 2+ batches)
+- Validate response format (expect metadata with `sql_query` and at least one batch)
+- Handle missing metadata gracefully (older protocol versions)
 
 ## Extensibility