datafusion-contrib
diff --git a/‎.claude/settings.local.json‎
Lines changed: 2 additions & 1 deletion b/‎.claude/settings.local.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎CLAUDE.md‎
Lines changed: 49 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 39 additions & 0 deletions b/‎README.md‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎crates/datafusion-app/src/flightsql.rs‎
Lines changed: 139 additions & 50 deletions b/‎crates/datafusion-app/src/flightsql.rs‎
Lines changed: 139 additions & 50 deletions
diff --git a/‎crates/datafusion-app/src/flightsql_benchmarks.rs‎
Lines changed: 7 additions & 1 deletion b/‎crates/datafusion-app/src/flightsql_benchmarks.rs‎
Lines changed: 7 additions & 1 deletion
@@ -28,7 +28,8 @@
       "Bash(ls:*)",
       "Bash(grep:*)",
       "Bash(cargo fmt:*)",
-      "Bash(./target/release/dft:*)"
+      "Bash(./target/release/dft:*)",
+      "Bash(/Users/matth/OpenSource/datafusion-tui/target/debug/dft:*)"
     ],
     "deny": [],
     "ask": []
 
@@ -33,6 +33,55 @@ cargo run --features=flightsql -- serve-flightsql
 cargo run -- generate-tpch
 ```
 
+### Benchmarking
+
+Benchmarks measure query performance with detailed timing breakdowns:
+
+```bash
+# Serial benchmark (default, 10 iterations)
+cargo run -- -c "SELECT 1" --bench
+
+# Custom iteration count
+cargo run -- -c "SELECT 1" --bench -n 100
+
+# Concurrent benchmark (measures throughput under load)
+cargo run -- -c "SELECT 1" --bench --concurrent
+
+# With custom iterations and concurrency
+cargo run -- -c "SELECT 1" --bench -n 100 --concurrent
+
+# Save results to CSV
+cargo run -- -c "SELECT 1" --bench --save results.csv
+
+# Append to existing results
+cargo run -- -c "SELECT 2" --bench --concurrent --save results.csv --append
+
+# Warm up cache before benchmarking
+cargo run -- -c "SELECT * FROM t" --bench --run-before "CREATE TABLE t AS VALUES (1)"
+```
+
+**Benchmark Modes:**
+- **Serial** (default): Measures query performance in isolation
+  - Shows pure query execution time without contention
+  - Ideal for understanding baseline performance
+
+- **Concurrent** (`--concurrent`): Measures performance under load
+  - Runs iterations in parallel (concurrency = min(iterations, CPU cores))
+  - Shows throughput (queries/second) with multiple clients
+  - Reveals resource contention and bottlenecks
+  - Higher mean/median times are expected due to concurrent load
+
+**Output:**
+- Timing breakdown: logical planning, physical planning, execution, total
+- Statistics: min, max, mean, median for each phase
+- CSV format includes `concurrency_mode` column (serial or concurrent(N))
+
+**FlightSQL Benchmarks:**
+```bash
+# Benchmark FlightSQL server (requires --flightsql flag and server running)
+cargo run -- -c "SELECT 1" --bench --flightsql --concurrent
+```
+
 ### Testing
 
 Tests are organized by feature and component:
 
@@ -68,6 +68,12 @@ dft -f query.sql
 # Benchmark a query (with stats)
 dft -c "SELECT * FROM my_table" --bench
 
+# Concurrent benchmark (measures throughput under load)
+dft -c "SELECT * FROM my_table" --bench --concurrent
+
+# Save benchmark results to CSV
+dft -c "SELECT * FROM my_table" --bench --save results.csv
+
 # Start FlightSQL Server (requires `flightsql` feature)
 dft serve-flightsql
 
@@ -78,6 +84,39 @@ dft serve-http
 dft generate-tpch
 ```
 
+### Benchmarking
+
+`dft` includes built-in benchmarking to measure query performance with detailed timing breakdowns:
+
+```sh
+# Serial benchmark (default) - measures query performance in isolation
+dft -c "SELECT * FROM my_table" --bench
+
+# Concurrent benchmark - measures throughput under load
+dft -c "SELECT * FROM my_table" --bench --concurrent
+
+# Custom iteration count
+dft -c "SELECT * FROM my_table" --bench -n 100
+
+# Save results to CSV for analysis
+dft -c "SELECT * FROM my_table" --bench --save results.csv
+
+# Compare serial vs concurrent performance
+dft -c "SELECT * FROM my_table" --bench --save results.csv
+dft -c "SELECT * FROM my_table" --bench --concurrent --save results.csv --append
+```
+
+**Benchmark Output:**
+- Timing breakdown by phase: logical planning, physical planning, execution
+- Statistics: min, max, mean, median for each phase
+- Row counts validation across all runs
+- CSV export with `concurrency_mode` column for result comparison
+
+**Serial vs Concurrent:**
+- **Serial**: Pure query execution time without contention (baseline performance)
+- **Concurrent**: Throughput measurement with parallel execution (reveals bottlenecks and contention)
+- Concurrent mode uses adaptive concurrency: `min(iterations, CPU cores)`
+
 ### Setting Up Tables with DDL
 
 `dft` can automatically load table definitions at startup, giving you a persistent "database-like" experience.
 
@@ -43,7 +43,8 @@ use tokio_stream::StreamExt;
 use tonic::{transport::Channel, IntoRequest};
 
 use crate::{
-    config::FlightSQLConfig, flightsql_benchmarks::FlightSQLBenchmarkStats, ExecOptions, ExecResult,
+    config::FlightSQLConfig, flightsql_benchmarks::FlightSQLBenchmarkStats,
+    local_benchmarks::BenchmarkMode, ExecOptions, ExecResult,
 };
 
 pub type FlightSQLClient = Arc<Mutex<Option<FlightSqlServiceClient<Channel>>>>;
@@ -120,70 +121,158 @@ impl FlightSQLContext {
         &self,
         query: &str,
         cli_iterations: Option<usize>,
+        concurrent: bool,
     ) -> Result<FlightSQLBenchmarkStats> {
         let iterations = cli_iterations.unwrap_or(self.config.benchmark_iterations);
+        let dialect = datafusion::sql::sqlparser::dialect::GenericDialect {};
+        let statements = DFParser::parse_sql_with_dialect(query, &dialect)?;
+
+        if statements.len() != 1 {
+            return Err(eyre::eyre!("Only a single statement can be benchmarked"));
+        }
+
+        // Check that client exists
+        {
+            let guard = self.client.lock().await;
+            if guard.is_none() {
+                return Err(eyre::eyre!("No FlightSQL client configured"));
+            }
+        }
+
+        let concurrency = if concurrent {
+            std::cmp::min(iterations, num_cpus::get())
+        } else {
+            1
+        };
+        let mode = if concurrent {
+            BenchmarkMode::Concurrent(concurrency)
+        } else {
+            BenchmarkMode::Serial
+        };
+
+        info!(
+            "Benchmarking FlightSQL query with {} iterations (concurrency: {})",
+            iterations, concurrency
+        );
+
         let mut rows_returned = Vec::with_capacity(iterations);
         let mut get_flight_info_durations = Vec::with_capacity(iterations);
         let mut ttfb_durations = Vec::with_capacity(iterations);
         let mut do_get_durations = Vec::with_capacity(iterations);
         let mut total_durations = Vec::with_capacity(iterations);
 
-        let dialect = datafusion::sql::sqlparser::dialect::GenericDialect {};
-        let statements = DFParser::parse_sql_with_dialect(query, &dialect)?;
-        if statements.len() == 1 {
-            if let Some(ref mut client) = *self.client.lock().await {
+        if !concurrent {
+            // Serial execution
+            let mut guard = self.client.lock().await;
+            if let Some(ref mut client) = *guard {
                 for _ in 0..iterations {
-                    let mut rows = 0;
-                    let start = std::time::Instant::now();
-                    let flight_info = client.execute(query.to_string(), None).await?;
-                    if flight_info.endpoint.len() > 1 {
-                        warn!("More than one endpoint: Benchmark results will not be reliable");
-                    }
-                    let get_flight_info_duration = start.elapsed();
-                    // Current logic wont properly handle having multiple endpoints
-                    for endpoint in flight_info.endpoint {
-                        if let Some(ticket) = &endpoint.ticket {
-                            match client.do_get(ticket.clone().into_request()).await {
-                                Ok(ref mut s) => {
-                                    let mut batch_count = 0;
-                                    while let Some(b) = s.next().await {
-                                        rows += b?.num_rows();
-                                        if batch_count == 0 {
-                                            let ttfb_duration =
-                                                start.elapsed() - get_flight_info_duration;
-                                            ttfb_durations.push(ttfb_duration);
-                                        }
-                                        batch_count += 1;
-                                    }
-                                    let do_get_duration =
-                                        start.elapsed() - get_flight_info_duration;
-                                    do_get_durations.push(do_get_duration);
-                                }
-                                Err(e) => {
-                                    error!("Error getting Flight stream: {:?}", e);
-                                }
+                    let (rows, gfi_dur, ttfb_dur, dg_dur, total_dur) =
+                        Self::benchmark_single_iteration(client, query).await?;
+                    rows_returned.push(rows);
+                    get_flight_info_durations.push(gfi_dur);
+                    ttfb_durations.push(ttfb_dur);
+                    do_get_durations.push(dg_dur);
+                    total_durations.push(total_dur);
+                }
+            }
+        } else {
+            // Concurrent execution - spawn tasks that share the client
+            let mut completed = 0;
+
+            while completed < iterations {
+                let batch_size = std::cmp::min(concurrency, iterations - completed);
+                let mut join_set = tokio::task::JoinSet::new();
+
+                for _ in 0..batch_size {
+                    let client = Arc::clone(&self.client);
+                    let query_str = query.to_string();
+
+                    join_set.spawn(async move {
+                        let mut guard = client.lock().await;
+                        if let Some(ref mut c) = *guard {
+                            Self::benchmark_single_iteration(c, &query_str).await
+                        } else {
+                            Err(eyre::eyre!("No FlightSQL client configured"))
+                        }
+                    });
+                }
+
+                while let Some(result) = join_set.join_next().await {
+                    let (rows, gfi_dur, ttfb_dur, dg_dur, total_dur) = result??;
+                    rows_returned.push(rows);
+                    get_flight_info_durations.push(gfi_dur);
+                    ttfb_durations.push(ttfb_dur);
+                    do_get_durations.push(dg_dur);
+                    total_durations.push(total_dur);
+                }
+
+                completed += batch_size;
+            }
+        }
+
+        Ok(FlightSQLBenchmarkStats::new(
+            query.to_string(),
+            rows_returned,
+            mode,
+            get_flight_info_durations,
+            ttfb_durations,
+            do_get_durations,
+            total_durations,
+        ))
+    }
+
+    async fn benchmark_single_iteration(
+        client: &mut FlightSqlServiceClient<Channel>,
+        query: &str,
+    ) -> Result<(
+        usize,
+        std::time::Duration,
+        std::time::Duration,
+        std::time::Duration,
+        std::time::Duration,
+    )> {
+        let mut rows = 0;
+        let start = std::time::Instant::now();
+        let flight_info = client.execute(query.to_string(), None).await?;
+
+        if flight_info.endpoint.len() > 1 {
+            warn!("More than one endpoint: Benchmark results will not be reliable");
+        }
+
+        let get_flight_info_duration = start.elapsed();
+        let mut ttfb_duration = std::time::Duration::from_secs(0);
+        let mut do_get_duration = std::time::Duration::from_secs(0);
+
+        for endpoint in flight_info.endpoint {
+            if let Some(ticket) = &endpoint.ticket {
+                match client.do_get(ticket.clone().into_request()).await {
+                    Ok(ref mut s) => {
+                        let mut batch_count = 0;
+                        while let Some(b) = s.next().await {
+                            rows += b?.num_rows();
+                            if batch_count == 0 {
+                                ttfb_duration = start.elapsed() - get_flight_info_duration;
                             }
+                            batch_count += 1;
                         }
+                        do_get_duration = start.elapsed() - get_flight_info_duration;
+                    }
+                    Err(e) => {
+                        error!("Error getting Flight stream: {:?}", e);
+                        return Err(e.into());
                     }
-                    rows_returned.push(rows);
-                    get_flight_info_durations.push(get_flight_info_duration);
-                    let total_duration = start.elapsed();
-                    total_durations.push(total_duration);
                 }
-            } else {
-                return Err(eyre::eyre!("No FlightSQL client configured"));
             }
-            Ok(FlightSQLBenchmarkStats::new(
-                query.to_string(),
-                rows_returned,
-                get_flight_info_durations,
-                ttfb_durations,
-                do_get_durations,
-                total_durations,
-            ))
-        } else {
-            Err(eyre::eyre!("Only a single statement can be benchmarked"))
         }
+
+        let total_duration = start.elapsed();
+        Ok((
+            rows,
+            get_flight_info_duration,
+            ttfb_duration,
+            do_get_duration,
+            total_duration,
+        ))
     }
 
     pub async fn execute_sql_with_opts(
 
@@ -19,11 +19,13 @@ use std::time::Duration;
 
 use crate::local_benchmarks::is_all_same;
 
+use crate::local_benchmarks::BenchmarkMode;
 use crate::local_benchmarks::DurationsSummary;
 
 pub struct FlightSQLBenchmarkStats {
     query: String,
     runs: usize,
+    mode: BenchmarkMode,
     rows: Vec<usize>,
     get_flight_info_durations: Vec<Duration>,
     ttfb_durations: Vec<Duration>,
@@ -35,6 +37,7 @@ impl FlightSQLBenchmarkStats {
     pub fn new(
         query: String,
         rows: Vec<usize>,
+        mode: BenchmarkMode,
         get_flight_info_durations: Vec<Duration>,
         ttfb_durations: Vec<Duration>,
         do_get_durations: Vec<Duration>,
@@ -44,6 +47,7 @@ impl FlightSQLBenchmarkStats {
         Self {
             query,
             runs,
+            mode,
             rows,
             get_flight_info_durations,
             ttfb_durations,
@@ -103,6 +107,8 @@ impl FlightSQLBenchmarkStats {
         csv.push_str(execution_summary.to_csv_fields().as_str());
         csv.push(',');
         csv.push_str(total_summary.to_csv_fields().as_str());
+        csv.push(',');
+        csv.push_str(&self.mode.to_string());
         csv
     }
 }
@@ -111,7 +117,7 @@ impl std::fmt::Display for FlightSQLBenchmarkStats {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         writeln!(f)?;
         writeln!(f, "----------------------------")?;
-        writeln!(f, "Benchmark Stats ({} runs)", self.runs)?;
+        writeln!(f, "Benchmark Stats ({} runs, {})", self.runs, self.mode)?;
         writeln!(f, "----------------------------")?;
         writeln!(f, "{}", self.query)?;
         writeln!(f, "----------------------------")?;