@@ -737,3 +737,108 @@ DROP TABLE t_union_mem;
737737
738738statement ok
739739DROP TABLE t_union_parquet;
740+
741+ # Cleanup settings
742+ statement ok
743+ set datafusion.optimizer.max_passes = 3;
744+
745+ statement ok
746+ set datafusion.execution.parquet.pushdown_filters = false;
747+
748+
749+ # Regression test for https://github.com/apache/datafusion/issues/20696
750+ # Multi-column INNER JOIN with dictionary fails
751+ # when parquet pushdown filters are enabled.
752+
753+
754+ statement ok
755+ COPY (
756+ SELECT
757+ to_timestamp_nanos(time_ns) AS time,
758+ arrow_cast(state, 'Dictionary(Int32, Utf8)') AS state,
759+ arrow_cast(city, 'Dictionary(Int32, Utf8)') AS city,
760+ temp
761+ FROM (
762+ VALUES
763+ (200, 'CA', 'LA', 90.0),
764+ (250, 'MA', 'Boston', 72.4),
765+ (100, 'MA', 'Boston', 70.4),
766+ (350, 'CA', 'LA', 90.0)
767+ ) AS t(time_ns, state, city, temp)
768+ )
769+ TO 'test_files/scratch/parquet_filter_pushdown/issue_20696/h2o/data.parquet';
770+
771+ statement ok
772+ COPY (
773+ SELECT
774+ to_timestamp_nanos(time_ns) AS time,
775+ arrow_cast(state, 'Dictionary(Int32, Utf8)') AS state,
776+ arrow_cast(city, 'Dictionary(Int32, Utf8)') AS city,
777+ temp,
778+ reading
779+ FROM (
780+ VALUES
781+ (250, 'MA', 'Boston', 53.4, 51.0),
782+ (100, 'MA', 'Boston', 50.4, 50.0)
783+ ) AS t(time_ns, state, city, temp, reading)
784+ )
785+ TO 'test_files/scratch/parquet_filter_pushdown/issue_20696/o2/data.parquet';
786+
787+ statement ok
788+ CREATE EXTERNAL TABLE h2o_parquet_20696 STORED AS PARQUET
789+ LOCATION 'test_files/scratch/parquet_filter_pushdown/issue_20696/h2o/';
790+
791+ statement ok
792+ CREATE EXTERNAL TABLE o2_parquet_20696 STORED AS PARQUET
793+ LOCATION 'test_files/scratch/parquet_filter_pushdown/issue_20696/o2/';
794+
795+ # Query should work both with and without filters
796+ statement ok
797+ set datafusion.execution.parquet.pushdown_filters = false;
798+
799+ query RRR
800+ SELECT
801+ h2o_parquet_20696.temp AS h2o_temp,
802+ o2_parquet_20696.temp AS o2_temp,
803+ o2_parquet_20696.reading
804+ FROM h2o_parquet_20696
805+ INNER JOIN o2_parquet_20696
806+ ON h2o_parquet_20696.time = o2_parquet_20696.time
807+ AND h2o_parquet_20696.state = o2_parquet_20696.state
808+ AND h2o_parquet_20696.city = o2_parquet_20696.city
809+ WHERE h2o_parquet_20696.time >= '1970-01-01T00:00:00.000000050Z'
810+ AND h2o_parquet_20696.time <= '1970-01-01T00:00:00.000000300Z';
811+ ----
812+ 72.4 53.4 51
813+ 70.4 50.4 50
814+
815+
816+ statement ok
817+ set datafusion.execution.parquet.pushdown_filters = true;
818+
819+ query RRR
820+ SELECT
821+ h2o_parquet_20696.temp AS h2o_temp,
822+ o2_parquet_20696.temp AS o2_temp,
823+ o2_parquet_20696.reading
824+ FROM h2o_parquet_20696
825+ INNER JOIN o2_parquet_20696
826+ ON h2o_parquet_20696.time = o2_parquet_20696.time
827+ AND h2o_parquet_20696.state = o2_parquet_20696.state
828+ AND h2o_parquet_20696.city = o2_parquet_20696.city
829+ WHERE h2o_parquet_20696.time >= '1970-01-01T00:00:00.000000050Z'
830+ AND h2o_parquet_20696.time <= '1970-01-01T00:00:00.000000300Z';
831+ ----
832+ 72.4 53.4 51
833+ 70.4 50.4 50
834+
835+ # Cleanup
836+ statement ok
837+ DROP TABLE h2o_parquet_20696;
838+
839+ statement ok
840+ DROP TABLE o2_parquet_20696;
841+
842+ # Cleanup settings
843+ statement ok
844+ set datafusion.execution.parquet.pushdown_filters = false;
0 commit comments