@@ -303,6 +303,198 @@ const HASH_QUERIES: &[HashJoinQuery] = &[
303303 build_size : "100K_(20%_dups)" ,
304304 probe_size : "60M" ,
305305 } ,
306+ // RightSemi Join benchmarks with Int32 keys
307+ // Q16: RightSemi, 100% Density, 100% Hit rate
308+ HashJoinQuery {
309+ sql : r###"SELECT l.k
310+ FROM (
311+ SELECT CAST(l_suppkey AS INT) as k FROM lineitem
312+ ) l
313+ WHERE EXISTS (
314+ SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
315+ )"### ,
316+ density : 1.0 ,
317+ prob_hit : 1.0 ,
318+ build_size : "100K" ,
319+ probe_size : "60M_RightSemi" ,
320+ } ,
321+ // Q17: RightSemi, 100% Density, 10% Hit rate
322+ HashJoinQuery {
323+ sql : r###"SELECT l.k
324+ FROM (
325+ SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k
326+ FROM lineitem
327+ ) l
328+ WHERE EXISTS (
329+ SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
330+ )"### ,
331+ density : 1.0 ,
332+ prob_hit : 0.1 ,
333+ build_size : "100K" ,
334+ probe_size : "60M_RightSemi" ,
335+ } ,
336+ // Q18: RightSemi, 50% Density, 100% Hit rate
337+ HashJoinQuery {
338+ sql : r###"SELECT l.k
339+ FROM (
340+ SELECT CAST(l_suppkey * 2 AS INT) as k FROM lineitem
341+ ) l
342+ WHERE EXISTS (
343+ SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
344+ )"### ,
345+ density : 0.5 ,
346+ prob_hit : 1.0 ,
347+ build_size : "100K" ,
348+ probe_size : "60M_RightSemi" ,
349+ } ,
350+ // Q19: RightSemi, 50% Density, 10% Hit rate
351+ HashJoinQuery {
352+ sql : r###"SELECT l.k
353+ FROM (
354+ SELECT CAST(CASE
355+ WHEN l_suppkey % 10 = 0 THEN l_suppkey * 2
356+ WHEN l_suppkey % 10 < 9 THEN l_suppkey * 2 + 1
357+ ELSE l_suppkey * 2 + 1000000
358+ END AS INT) as k
359+ FROM lineitem
360+ ) l
361+ WHERE EXISTS (
362+ SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
363+ )"### ,
364+ density : 0.5 ,
365+ prob_hit : 0.1 ,
366+ build_size : "100K" ,
367+ probe_size : "60M_RightSemi" ,
368+ } ,
369+ // Q20: RightSemi, 10% Density, 100% Hit rate
370+ HashJoinQuery {
371+ sql : r###"SELECT l.k
372+ FROM (
373+ SELECT CAST(l_suppkey * 10 AS INT) as k FROM lineitem
374+ ) l
375+ WHERE EXISTS (
376+ SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
377+ )"### ,
378+ density : 0.1 ,
379+ prob_hit : 1.0 ,
380+ build_size : "100K" ,
381+ probe_size : "60M_RightSemi" ,
382+ } ,
383+ // Q21: RightSemi, 10% Density, 10% Hit rate
384+ HashJoinQuery {
385+ sql : r###"SELECT l.k
386+ FROM (
387+ SELECT CAST(CASE
388+ WHEN l_suppkey % 10 = 0 THEN l_suppkey * 10
389+ WHEN l_suppkey % 10 < 9 THEN l_suppkey * 10 + 1
390+ ELSE l_suppkey * 10 + 1000000
391+ END AS INT) as k
392+ FROM lineitem
393+ ) l
394+ WHERE EXISTS (
395+ SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
396+ )"### ,
397+ density : 0.1 ,
398+ prob_hit : 0.1 ,
399+ build_size : "100K" ,
400+ probe_size : "60M_RightSemi" ,
401+ } ,
402+ // RightAnti Join benchmarks with Int32 keys
403+ // Q22: RightAnti, 100% Density, 100% Hit rate (no output)
404+ HashJoinQuery {
405+ sql : r###"SELECT l.k
406+ FROM (
407+ SELECT CAST(l_suppkey AS INT) as k FROM lineitem
408+ ) l
409+ WHERE NOT EXISTS (
410+ SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
411+ )"### ,
412+ density : 1.0 ,
413+ prob_hit : 1.0 ,
414+ build_size : "100K" ,
415+ probe_size : "60M_RightAnti" ,
416+ } ,
417+ // Q23: RightAnti, 100% Density, 10% Hit rate (90% output)
418+ HashJoinQuery {
419+ sql : r###"SELECT l.k
420+ FROM (
421+ SELECT CAST(CASE WHEN l_suppkey % 10 = 0 THEN l_suppkey ELSE l_suppkey + 1000000 END AS INT) as k
422+ FROM lineitem
423+ ) l
424+ WHERE NOT EXISTS (
425+ SELECT 1 FROM (SELECT CAST(s_suppkey AS INT) as k FROM supplier) s WHERE s.k = l.k
426+ )"### ,
427+ density : 1.0 ,
428+ prob_hit : 0.1 ,
429+ build_size : "100K" ,
430+ probe_size : "60M_RightAnti" ,
431+ } ,
432+ // Q24: RightAnti, 50% Density, 100% Hit rate (no output)
433+ HashJoinQuery {
434+ sql : r###"SELECT l.k
435+ FROM (
436+ SELECT CAST(l_suppkey * 2 AS INT) as k FROM lineitem
437+ ) l
438+ WHERE NOT EXISTS (
439+ SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
440+ )"### ,
441+ density : 0.5 ,
442+ prob_hit : 1.0 ,
443+ build_size : "100K" ,
444+ probe_size : "60M_RightAnti" ,
445+ } ,
446+ // Q25: RightAnti, 50% Density, 10% Hit rate (90% output)
447+ HashJoinQuery {
448+ sql : r###"SELECT l.k
449+ FROM (
450+ SELECT CAST(CASE
451+ WHEN l_suppkey % 10 = 0 THEN l_suppkey * 2
452+ WHEN l_suppkey % 10 < 9 THEN l_suppkey * 2 + 1
453+ ELSE l_suppkey * 2 + 1000000
454+ END AS INT) as k
455+ FROM lineitem
456+ ) l
457+ WHERE NOT EXISTS (
458+ SELECT 1 FROM (SELECT CAST(s_suppkey * 2 AS INT) as k FROM supplier) s WHERE s.k = l.k
459+ )"### ,
460+ density : 0.5 ,
461+ prob_hit : 0.1 ,
462+ build_size : "100K" ,
463+ probe_size : "60M_RightAnti" ,
464+ } ,
465+ // Q26: RightAnti, 10% Density, 100% Hit rate (no output)
466+ HashJoinQuery {
467+ sql : r###"SELECT l.k
468+ FROM (
469+ SELECT CAST(l_suppkey * 10 AS INT) as k FROM lineitem
470+ ) l
471+ WHERE NOT EXISTS (
472+ SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
473+ )"### ,
474+ density : 0.1 ,
475+ prob_hit : 1.0 ,
476+ build_size : "100K" ,
477+ probe_size : "60M_RightAnti" ,
478+ } ,
479+ // Q27: RightAnti, 10% Density, 10% Hit rate (90% output)
480+ HashJoinQuery {
481+ sql : r###"SELECT l.k
482+ FROM (
483+ SELECT CAST(CASE
484+ WHEN l_suppkey % 10 = 0 THEN l_suppkey * 10
485+ WHEN l_suppkey % 10 < 9 THEN l_suppkey * 10 + 1
486+ ELSE l_suppkey * 10 + 1000000
487+ END AS INT) as k
488+ FROM lineitem
489+ ) l
490+ WHERE NOT EXISTS (
491+ SELECT 1 FROM (SELECT CAST(s_suppkey * 10 AS INT) as k FROM supplier) s WHERE s.k = l.k
492+ )"### ,
493+ density : 0.1 ,
494+ prob_hit : 0.1 ,
495+ build_size : "100K" ,
496+ probe_size : "60M_RightAnti" ,
497+ } ,
306498] ;
307499
308500impl RunOpt {
0 commit comments