Add test for runtime memory limiting

alamb · alamb · commit 50b3509836a4 · 2022-11-28T13:32:48.000-05:00
diff --git a/datafusion/core/src/execution/memory_manager/proxy.rs b/datafusion/core/src/execution/memory_manager/proxy.rs
@@ -88,9 +88,10 @@ impl MemoryConsumer for MemoryConsumerProxy {
     }
 
     async fn spill(&self) -> Result<usize, DataFusionError> {
-        Err(DataFusionError::ResourcesExhausted(
-            "Cannot spill AggregationState".to_owned(),
-        ))
+        Err(DataFusionError::ResourcesExhausted(format!(
+            "Cannot spill {}",
+            self.name
+        )))
     }
 
     fn mem_used(&self) -> usize {
diff --git a/datafusion/core/src/physical_plan/aggregates/hash.rs b/datafusion/core/src/physical_plan/aggregates/hash.rs
@@ -135,7 +135,7 @@ impl GroupedHashAggregateStream {
             aggregate_expressions,
             accumulators: Accumulators {
                 memory_consumer: MemoryConsumerProxy::new(
-                    "Accumulators",
+                    "GroupBy Hash Accumulators",
                     MemoryConsumerId::new(partition),
                     Arc::clone(&context.runtime_env().memory_manager),
                 ),
diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
@@ -144,7 +144,7 @@ impl GroupedHashAggregateStreamV2 {
 
         let aggr_state = AggregationState {
             memory_consumer: MemoryConsumerProxy::new(
-                "AggregationState",
+                "GroupBy Hash (Row) AggregationState",
                 MemoryConsumerId::new(partition),
                 Arc::clone(&context.runtime_env().memory_manager),
             ),
diff --git a/datafusion/core/src/physical_plan/sorts/sort.rs b/datafusion/core/src/physical_plan/sorts/sort.rs
@@ -118,6 +118,7 @@ impl ExternalSorter {
     ) -> Result<()> {
         if input.num_rows() > 0 {
             let size = batch_byte_size(&input);
+            debug!("Inserting {} rows of {} bytes", input.num_rows(), size);
             self.try_grow(size).await?;
             self.metrics.mem_used().add(size);
             let mut in_mem_batches = self.in_mem_batches.lock().await;
@@ -272,6 +273,13 @@ impl MemoryConsumer for ExternalSorter {
     }
 
     async fn spill(&self) -> Result<usize> {
+        let partition = self.partition_id();
+        let mut in_mem_batches = self.in_mem_batches.lock().await;
+        // we could always get a chance to free some memory as long as we are holding some
+        if in_mem_batches.len() == 0 {
+            return Ok(0);
+        }
+
         debug!(
             "{}[{}] spilling sort data of {} to disk while inserting ({} time(s) so far)",
             self.name(),
@@ -280,13 +288,6 @@ impl MemoryConsumer for ExternalSorter {
             self.spill_count()
         );
 
-        let partition = self.partition_id();
-        let mut in_mem_batches = self.in_mem_batches.lock().await;
-        // we could always get a chance to free some memory as long as we are holding some
-        if in_mem_batches.len() == 0 {
-            return Ok(0);
-        }
-
         let tracking_metrics = self
             .metrics_set
             .new_intermediate_tracking(partition, self.runtime.clone());
diff --git a/datafusion/core/tests/memory_limit.rs b/datafusion/core/tests/memory_limit.rs
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! This module contains tests for limiting memory at runtime in DataFusion
+
+use std::sync::Arc;
+
+use arrow::record_batch::RecordBatch;
+use datafusion::datasource::MemTable;
+use datafusion::execution::disk_manager::DiskManagerConfig;
+use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+use datafusion_common::assert_contains;
+
+use datafusion::prelude::{SessionConfig, SessionContext};
+use test_utils::{stagger_batch, AccessLogGenerator};
+
+#[cfg(test)]
+#[ctor::ctor]
+fn init() {
+    let _ = env_logger::try_init();
+}
+
+#[tokio::test]
+async fn oom_sort() {
+    run_limit_test(
+        "select * from t order by host DESC",
+        "Resources exhausted: Memory Exhausted while Sorting (DiskManager is disabled)",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn group_by_none() {
+    run_limit_test(
+        "select median(image) from t",
+        "Resources exhausted: Cannot spill AggregationState",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn group_by_row_hash() {
+    run_limit_test(
+        "select count(*) from t GROUP BY response_bytes",
+        "Resources exhausted: Cannot spill GroupBy Hash (Row) AggregationState",
+    )
+    .await
+}
+
+#[tokio::test]
+async fn group_by_hash() {
+    run_limit_test(
+        // group by dict column
+        "select count(*) from t GROUP BY service, host, pod, container",
+        "Resources exhausted: Cannot spill GroupBy Hash Accumulators",
+    )
+    .await
+}
+
+/// 100K memory limit
+const MEMORY_LIMIT_BYTES: usize = 50;
+const MEMORY_FRACTION: f64 = 0.95;
+
+/// runs the specified query against 1000 rows with a 50
+/// byte memory limit and no disk manager enabled.
+async fn run_limit_test(query: &str, expected_error: &str) {
+    let generator = AccessLogGenerator::new().with_row_limit(Some(1000));
+
+    let batches: Vec<RecordBatch> = generator
+        // split up into more than one batch, as the size limit in sort is not enforced until the second batch
+        .flat_map(stagger_batch)
+        .collect();
+
+    let table = MemTable::try_new(batches[0].schema(), vec![batches]).unwrap();
+
+    let rt_config = RuntimeConfig::new()
+        // do not allow spilling
+        .with_disk_manager(DiskManagerConfig::Disabled)
+        // Only allow 50 bytes
+        .with_memory_limit(MEMORY_LIMIT_BYTES, MEMORY_FRACTION);
+
+    let runtime = RuntimeEnv::new(rt_config).unwrap();
+
+    let ctx = SessionContext::with_config_rt(SessionConfig::new(), Arc::new(runtime));
+    ctx.register_table("t", Arc::new(table))
+        .expect("registering table");
+
+    let df = ctx.sql(query).await.expect("Planning query");
+
+    match df.collect().await {
+        Ok(_batches) => {
+            panic!("Unexpected success when running, expected memory limit failure")
+        }
+        Err(e) => {
+            assert_contains!(e.to_string(), expected_error);
+        }
+    }
+}
diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs
@@ -19,7 +19,7 @@
 use arrow::record_batch::RecordBatch;
 use datafusion_common::cast::as_int32_array;
 use rand::prelude::StdRng;
-use rand::Rng;
+use rand::{Rng, SeedableRng};
 
 mod data_gen;
 
@@ -68,3 +68,28 @@ pub fn add_empty_batches(
         })
         .collect()
 }
+
+/// "stagger" batches: split the batches into random sized batches
+pub fn stagger_batch(batch: RecordBatch) -> Vec<RecordBatch> {
+    let seed = 42;
+    stagger_batch_with_seed(batch, seed)
+}
+
+/// "stagger" batches: split the batches into random sized batches
+/// using the specified value for a rng seed
+pub fn stagger_batch_with_seed(batch: RecordBatch, seed: u64) -> Vec<RecordBatch> {
+    let mut batches = vec![];
+
+    // use a random number generator to pick a random sized output
+    let mut rng = StdRng::seed_from_u64(seed);
+
+    let mut remainder = batch;
+    while remainder.num_rows() > 0 {
+        let batch_size = rng.gen_range(0..remainder.num_rows() + 1);
+
+        batches.push(remainder.slice(0, batch_size));
+        remainder = remainder.slice(batch_size, remainder.num_rows() - batch_size);
+    }
+
+    batches
+}

Original file line number	Diff line number	Diff line change
`@@ -88,9 +88,10 @@ impl MemoryConsumer for MemoryConsumerProxy {`
`88`	`88`	`}`
`89`	`89`
`90`	`90`	`async fn spill(&self) -> Result<usize, DataFusionError> {`
`91`		`- Err(DataFusionError::ResourcesExhausted(`
`92`		`- "Cannot spill AggregationState".to_owned(),`
`93`		`- ))`
	`91`	`+ Err(DataFusionError::ResourcesExhausted(format!(`
	`92`	`+ "Cannot spill {}",`
	`93`	`+ self.name`
	`94`	`+ )))`
`94`	`95`	`}`
`95`	`96`
`96`	`97`	`fn mem_used(&self) -> usize {`