diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContext.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContext.java index a6e4f374813..cbf427bc6e3 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContext.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContext.java @@ -66,6 +66,16 @@ interface UnifiedChunkFilter extends SafeCloseable { */ List> columnSources(); + /** + * Whether this filter is a range filter. + */ + boolean isRangeFilter(); + + /** + * Whether this filter is a match filter. + */ + boolean isMatchFilter(); + /** * Whether this filter supports direct chunk filtering, i.e., it can be applied to a chunk of data rather than a * table. This includes any filter that implements {@link ExposesChunkFilter} or {@link ConditionFilter} with diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContextImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContextImpl.java index c69c75fa739..7e4d0f29626 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContextImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/BasePushdownFilterContextImpl.java @@ -60,19 +60,21 @@ public BasePushdownFilterContextImpl( "filter must be stateless, but does not permit parallelization: " + filter); } - this.filter = filter; this.columnSources = columnSources; - executedFilterCost = 0; + // Extract the effective filter and use it for populating the context. + final WhereFilter effectiveFilter = WhereFilterDelegating.maybeUnwrapFilter(filter); + this.filter = effectiveFilter; - isRangeFilter = filter instanceof RangeFilter - && ((RangeFilter) filter).getRealFilter() instanceof AbstractRangeFilter; - isMatchFilter = filter instanceof MatchFilter && - ((MatchFilter) filter).getFailoverFilterIfCached() == null; + isRangeFilter = effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof AbstractRangeFilter; + isMatchFilter = effectiveFilter instanceof MatchFilter && + ((MatchFilter) effectiveFilter).getFailoverFilterIfCached() == null; - final Optional chunkFilter = ExposesChunkFilter.chunkFilter(filter); + final Optional chunkFilter = ExposesChunkFilter.chunkFilter(effectiveFilter); supportsChunkFiltering = chunkFilter.isPresent() - || (filter instanceof ConditionFilter && ((ConditionFilter) filter).getNumInputsUsed() == 1); + || (effectiveFilter instanceof ConditionFilter + && ((ConditionFilter) effectiveFilter).getNumInputsUsed() == 1); conditionalFilterInitTable = null; // lazily initialized filterNullBehavior = null; // lazily initialized @@ -104,6 +106,16 @@ public final List> columnSources() { return columnSources; } + @Override + public final boolean isRangeFilter() { + return isRangeFilter; + } + + @Override + public final boolean isMatchFilter() { + return isMatchFilter; + } + /** * Whether this filter supports direct chunk filtering, i.e., it can be applied to a chunk of data rather than a * table. This includes any filter that implements {@link ExposesChunkFilter} or {@link ConditionFilter} with diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java index 8cb69f329ac..de6a61a339d 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java @@ -290,6 +290,13 @@ public interface MemoizableOperation getDependencyStream() { return Stream.empty(); } + @Override public WhereFilter getWrappedFilter() { return filter; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterInvertedImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterInvertedImpl.java index 96a66905ebd..cc486388472 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterInvertedImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterInvertedImpl.java @@ -23,6 +23,12 @@ private WhereFilterInvertedImpl(WhereFilter filter) { super(filter); } + @Override + public WhereFilter maybeUnwrapFilter() { + // This filter inverts the results of the wrapped filter so we can't unwrap. + return this; + } + @NotNull @Override public WritableRowSet filter( @@ -47,11 +53,6 @@ public String toString() { return "not(" + filter + ")"; } - @VisibleForTesting - WhereFilter filter() { - return filter; - } - @Override public final T walk(Visitor visitor) { return visitor.visit(this); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterSerialImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterSerialImpl.java index 72811ebe9b6..cddc5bee18e 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterSerialImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterSerialImpl.java @@ -21,6 +21,15 @@ private WhereFilterSerialImpl(WhereFilter filter) { super(filter); } + @Override + public WhereFilter maybeUnwrapFilter() { + if (filter instanceof WhereFilterDelegating) { + // Delegate to the wrapped filter to find the effective wrapped filter. + return ((WhereFilterDelegating) filter).maybeUnwrapFilter(); + } + return filter; + } + /** * Always returns {@code false} to indicate that parallelization is not permitted. * diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithDeclaredBarriersImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithDeclaredBarriersImpl.java index 601e6751e9f..6c53b32ad20 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithDeclaredBarriersImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithDeclaredBarriersImpl.java @@ -30,6 +30,15 @@ private WhereFilterWithDeclaredBarriersImpl( this.declaredBarriers = declaredBarriers; } + @Override + public WhereFilter maybeUnwrapFilter() { + if (filter instanceof WhereFilterDelegating) { + // Delegate to the wrapped filter to find the effective wrapped filter. + return ((WhereFilterDelegating) filter).maybeUnwrapFilter(); + } + return filter; + } + public Object[] declaredBarriers() { return declaredBarriers; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithRespectedBarriersImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithRespectedBarriersImpl.java index f1e2eeddfde..11718273113 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithRespectedBarriersImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilterWithRespectedBarriersImpl.java @@ -31,6 +31,15 @@ private WhereFilterWithRespectedBarriersImpl( this.respectedBarriers = respectedBarriers; } + @Override + public WhereFilter maybeUnwrapFilter() { + if (filter instanceof WhereFilterDelegating) { + // Delegate to the wrapped filter to find the effective wrapped filter. + return ((WhereFilterDelegating) filter).maybeUnwrapFilter(); + } + return filter; + } + public Object[] respectedBarriers() { return respectedBarriers; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernel.java index c7310bda558..29d83621e2a 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernel.java @@ -23,8 +23,8 @@ public class ByteRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -55,11 +55,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final byte toFind : unboxed) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final byte toFind : unboxed) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final byte toFind : unboxed) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -67,79 +87,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionByte region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final byte toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final byte min, + final byte max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && ByteComparisons.eq(region.getByte(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a byte within a given range. The method returns the - * row key where the byte was found. If the byte is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The byte to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionByte region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final byte min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified byte was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionByte region, - final byte toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final byte curVal = region.getByte(pivot); - final int comparison = ByteComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final byte max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionByte region, + final long firstKey, + final long lastKey, + final byte min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final byte midValue = region.getByte(mid); + final boolean satisfiesMin = minInc + ? ByteComparisons.geq(midValue, min) + : ByteComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionByte region, + final long firstKey, + final long lastKey, + final byte max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final byte midValue = region.getByte(mid); + final boolean satisfiesMax = maxInc + ? ByteComparisons.leq(midValue, max) + : ByteComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionByte region, + final long firstKey, + final long lastKey, + final byte max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final byte midValue = region.getByte(mid); + final boolean satisfiesMax = maxInc + ? ByteComparisons.leq(midValue, max) + : ByteComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionByte region, + final long firstKey, + final long lastKey, + final byte min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final byte midValue = region.getByte(mid); + final boolean satisfiesMin = minInc + ? ByteComparisons.geq(midValue, min) + : ByteComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernel.java index 64504d5afa9..0b322ec4e2a 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernel.java @@ -19,8 +19,8 @@ public class CharRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -51,11 +51,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final char toFind : unboxed) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final char toFind : unboxed) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final char toFind : unboxed) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -63,79 +83,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionChar region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final char toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final char min, + final char max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && CharComparisons.eq(region.getChar(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a char within a given range. The method returns the - * row key where the char was found. If the char is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The char to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionChar region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final char min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified char was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionChar region, - final char toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final char curVal = region.getChar(pivot); - final int comparison = CharComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final char max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionChar region, + final long firstKey, + final long lastKey, + final char min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final char midValue = region.getChar(mid); + final boolean satisfiesMin = minInc + ? CharComparisons.geq(midValue, min) + : CharComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionChar region, + final long firstKey, + final long lastKey, + final char max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final char midValue = region.getChar(mid); + final boolean satisfiesMax = maxInc + ? CharComparisons.leq(midValue, max) + : CharComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionChar region, + final long firstKey, + final long lastKey, + final char max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final char midValue = region.getChar(mid); + final boolean satisfiesMax = maxInc + ? CharComparisons.leq(midValue, max) + : CharComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionChar region, + final long firstKey, + final long lastKey, + final char min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final char midValue = region.getChar(mid); + final boolean satisfiesMin = minInc + ? CharComparisons.geq(midValue, min) + : CharComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernel.java index 2995a16ac60..fd27b2ef075 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernel.java @@ -23,8 +23,8 @@ public class DoubleRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -55,11 +55,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final double toFind : unboxed) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final double toFind : unboxed) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final double toFind : unboxed) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -67,79 +87,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionDouble region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final double toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final double min, + final double max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && DoubleComparisons.eq(region.getDouble(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a double within a given range. The method returns the - * row key where the double was found. If the double is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The double to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionDouble region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final double min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified double was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionDouble region, - final double toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final double curVal = region.getDouble(pivot); - final int comparison = DoubleComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final double max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionDouble region, + final long firstKey, + final long lastKey, + final double min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final double midValue = region.getDouble(mid); + final boolean satisfiesMin = minInc + ? DoubleComparisons.geq(midValue, min) + : DoubleComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionDouble region, + final long firstKey, + final long lastKey, + final double max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final double midValue = region.getDouble(mid); + final boolean satisfiesMax = maxInc + ? DoubleComparisons.leq(midValue, max) + : DoubleComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionDouble region, + final long firstKey, + final long lastKey, + final double max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final double midValue = region.getDouble(mid); + final boolean satisfiesMax = maxInc + ? DoubleComparisons.leq(midValue, max) + : DoubleComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionDouble region, + final long firstKey, + final long lastKey, + final double min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final double midValue = region.getDouble(mid); + final boolean satisfiesMin = minInc + ? DoubleComparisons.geq(midValue, min) + : DoubleComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernel.java index 3dac5c6ab32..3ded74246ed 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernel.java @@ -23,8 +23,8 @@ public class FloatRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -55,11 +55,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final float toFind : unboxed) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final float toFind : unboxed) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final float toFind : unboxed) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -67,79 +87,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionFloat region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final float toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final float min, + final float max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && FloatComparisons.eq(region.getFloat(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a float within a given range. The method returns the - * row key where the float was found. If the float is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The float to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionFloat region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final float min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified float was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionFloat region, - final float toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final float curVal = region.getFloat(pivot); - final int comparison = FloatComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final float max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionFloat region, + final long firstKey, + final long lastKey, + final float min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final float midValue = region.getFloat(mid); + final boolean satisfiesMin = minInc + ? FloatComparisons.geq(midValue, min) + : FloatComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionFloat region, + final long firstKey, + final long lastKey, + final float max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final float midValue = region.getFloat(mid); + final boolean satisfiesMax = maxInc + ? FloatComparisons.leq(midValue, max) + : FloatComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionFloat region, + final long firstKey, + final long lastKey, + final float max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final float midValue = region.getFloat(mid); + final boolean satisfiesMax = maxInc + ? FloatComparisons.leq(midValue, max) + : FloatComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionFloat region, + final long firstKey, + final long lastKey, + final float min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final float midValue = region.getFloat(mid); + final boolean satisfiesMin = minInc + ? FloatComparisons.geq(midValue, min) + : FloatComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernel.java index da07622326d..d2e3627d4ef 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernel.java @@ -23,8 +23,8 @@ public class IntRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -55,11 +55,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final int toFind : unboxed) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final int toFind : unboxed) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final int toFind : unboxed) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -67,79 +87,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionInt region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final int toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final int min, + final int max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && IntComparisons.eq(region.getInt(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a int within a given range. The method returns the - * row key where the int was found. If the int is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The int to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionInt region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final int min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified int was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionInt region, - final int toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final int curVal = region.getInt(pivot); - final int comparison = IntComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final int max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionInt region, + final long firstKey, + final long lastKey, + final int min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final int midValue = region.getInt(mid); + final boolean satisfiesMin = minInc + ? IntComparisons.geq(midValue, min) + : IntComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionInt region, + final long firstKey, + final long lastKey, + final int max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final int midValue = region.getInt(mid); + final boolean satisfiesMax = maxInc + ? IntComparisons.leq(midValue, max) + : IntComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionInt region, + final long firstKey, + final long lastKey, + final int max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final int midValue = region.getInt(mid); + final boolean satisfiesMax = maxInc + ? IntComparisons.leq(midValue, max) + : IntComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionInt region, + final long firstKey, + final long lastKey, + final int min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final int midValue = region.getInt(mid); + final boolean satisfiesMin = minInc + ? IntComparisons.geq(midValue, min) + : IntComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernel.java index 1427e4c16ab..98c78c1e4be 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernel.java @@ -23,8 +23,8 @@ public class LongRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -55,11 +55,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final long toFind : unboxed) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final long toFind : unboxed) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final long toFind : unboxed) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -67,79 +87,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionLong region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final long toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final long min, + final long max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && LongComparisons.eq(region.getLong(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a long within a given range. The method returns the - * row key where the long was found. If the long is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The long to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionLong region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final long min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified long was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionLong region, - final long toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final long curVal = region.getLong(pivot); - final int comparison = LongComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final long max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionLong region, + final long firstKey, + final long lastKey, + final long min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final long midValue = region.getLong(mid); + final boolean satisfiesMin = minInc + ? LongComparisons.geq(midValue, min) + : LongComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionLong region, + final long firstKey, + final long lastKey, + final long max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final long midValue = region.getLong(mid); + final boolean satisfiesMax = maxInc + ? LongComparisons.leq(midValue, max) + : LongComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionLong region, + final long firstKey, + final long lastKey, + final long max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final long midValue = region.getLong(mid); + final boolean satisfiesMax = maxInc + ? LongComparisons.leq(midValue, max) + : LongComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionLong region, + final long firstKey, + final long lastKey, + final long min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final long midValue = region.getLong(mid); + final boolean satisfiesMin = minInc + ? LongComparisons.geq(midValue, min) + : LongComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ObjectRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ObjectRegionBinarySearchKernel.java index f7c642318f3..cc8bef0bdf9 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ObjectRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ObjectRegionBinarySearchKernel.java @@ -22,8 +22,8 @@ public class ObjectRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -54,11 +54,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final Object toFind : searchValues) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final Object toFind : searchValues) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final Object toFind : searchValues) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -66,79 +86,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionObject region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final Object toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final Object min, + final Object max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && ObjectComparisons.eq(region.getObject(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a Object within a given range. The method returns the - * row key where the Object was found. If the Object is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The Object to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionObject region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final Object min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified Object was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionObject region, - final Object toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final Object curVal = region.getObject(pivot); - final int comparison = ObjectComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final Object max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionObject region, + final long firstKey, + final long lastKey, + final Object min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final Object midValue = region.getObject(mid); + final boolean satisfiesMin = minInc + ? ObjectComparisons.geq(midValue, min) + : ObjectComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionObject region, + final long firstKey, + final long lastKey, + final Object max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final Object midValue = region.getObject(mid); + final boolean satisfiesMax = maxInc + ? ObjectComparisons.leq(midValue, max) + : ObjectComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionObject region, + final long firstKey, + final long lastKey, + final Object max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final Object midValue = region.getObject(mid); + final boolean satisfiesMax = maxInc + ? ObjectComparisons.leq(midValue, max) + : ObjectComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionObject region, + final long firstKey, + final long lastKey, + final Object min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final Object midValue = region.getObject(mid); + final boolean satisfiesMin = minInc + ? ObjectComparisons.geq(midValue, min) + : ObjectComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernel.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernel.java index 8f484bf7bc2..cf1140573c5 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernel.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernel.java @@ -23,8 +23,8 @@ public class ShortRegionBinarySearchKernel { /** - * Performs a binary search on a given column region to find the positions (row keys) of specified sorted keys. The - * method returns the RowSet containing the matched row keys. + * Performs a binary search on a given column region to find the positions (row keys) of specified keys. The method + * returns the RowSet containing the matched row keys. * * @param region The column region in which the search will be performed. * @param firstKey The first key in the column region to consider for the search. @@ -55,11 +55,31 @@ public static RowSet binarySearchMatch( } final RowSetBuilderSequential builder = RowSetFactory.builderSequential(); - for (final short toFind : unboxed) { - final long lastFound = binarySearchSingle(region, builder, firstKey, lastKey, order, toFind); - if (lastFound >= 0) { - firstKey = lastFound + 1; + if (order.isAscending()) { + for (final short toFind : unboxed) { + final int start = findStartIndexAscending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + // No match for this key, move to the next key. + continue; + } + final int end = findEndIndexAscending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } + } + } else { + for (final short toFind : unboxed) { + final int start = findStartIndexDescending(region, firstKey, lastKey, toFind, true); + if (start == -1) { + continue; + } + final int end = findEndIndexDescending(region, start, lastKey, toFind, true); + if (end != -1) { + builder.appendRange(start, end); + firstKey = end + 1; + } } } @@ -67,79 +87,271 @@ public static RowSet binarySearchMatch( } /** - * Find the extents of the range containing the key to find, returning the last index found. + * Performs a binary search on a given column region to find the positions (row keys) of values within a specified + * range. * - * @param builder the builder to accumulate into - * @param firstKey the key to start searching - * @param lastKey the key to end searching - * @param sortDirection the sort direction of the column - * @param toFind the element to find - * @return the last key in the found range. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param max The maximum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchSingle( + public static RowSet binarySearchMinMax( @NotNull final ColumnRegionShort region, - @NotNull final RowSetBuilderSequential builder, final long firstKey, final long lastKey, - SortSpec.Order sortDirection, - final short toFind) { - // Find the beginning of the range - long matchStart = binarySearchRange(region, toFind, firstKey, lastKey, sortDirection, -1); - if (matchStart < 0) { - return -1; + @NotNull final SortColumn sortColumn, + final short min, + final short max, + final boolean minInc, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexAscending(region, offset, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + final long offset = Math.max(start, firstKey); + end = findEndIndexDescending(region, offset, lastKey, min, minInc); } - // Now we have to locate the actual start and end of the range. - long matchEnd = matchStart; - if (matchStart < lastKey && ShortComparisons.eq(region.getShort(matchStart + 1), toFind)) { - matchEnd = binarySearchRange(region, toFind, matchStart + 1, lastKey, sortDirection, 1); + // Validate that a logical range was found and the bounds didn't cross + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); } - builder.appendRange(matchStart, matchEnd); - return matchEnd; + return RowSetFactory.empty(); } /** - * Performs a binary search on a specified column region to find a short within a given range. The method returns the - * row key where the short was found. If the short is not found, it returns -1. + * Performs a binary search on a given column region to find the positions (row keys) of values greater than a + * specified minimum. * * @param region The column region in which the search will be performed. - * @param toFind The short to find within the column region. - * @param start The first row key in the column region to consider for the search. - * @param end The last row key in the column region to consider for the search. - * @param sortDirection An enum specifying the sorting direction of the column. - * @param rangeDirection An integer indicating the direction of the range search. Positive for forward search, - * negative for backward search. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param min The minimum value of the range. + * @param minInc {@code true} if the minimum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. + */ + public static RowSet binarySearchMin( + @NotNull final ColumnRegionShort region, + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final short min, + final boolean minInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = findStartIndexAscending(region, firstKey, lastKey, min, minInc); + end = Math.toIntExact(lastKey); + } else { + start = Math.toIntExact(firstKey); + end = findEndIndexDescending(region, firstKey, lastKey, min, minInc); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Performs a binary search on a given column region to find the positions (row keys) of values less than a + * specified maximum. * - * @return The row key where the specified short was found. If not found, returns -1. + * @param region The column region in which the search will be performed. + * @param firstKey The first key in the column region to consider for the search. + * @param lastKey The last key in the column region to consider for the search. + * @param sortColumn A {@link SortColumn} object representing the sorting order of the column. + * @param max The maximum value of the range. + * @param maxInc {@code true} if the maximum value is inclusive, {@code false} otherwise. + * @return A {@link RowSet} containing the row keys where the values were found. */ - private static long binarySearchRange( + public static RowSet binarySearchMax( @NotNull final ColumnRegionShort region, - final short toFind, - long start, - long end, - final SortSpec.Order sortDirection, - final int rangeDirection) { - final int sortDirectionInt = sortDirection.isAscending() ? 1 : -1; - long matchStart = -1; - while (start <= end) { - long pivot = (start + end) >>> 1; - final short curVal = region.getShort(pivot); - final int comparison = ShortComparisons.compare(curVal, toFind) * sortDirectionInt; - if (comparison < 0) { - start = pivot + 1; - } else if (comparison == 0) { - matchStart = pivot; - if (rangeDirection > 0) { - start = pivot + 1; - } else { - end = pivot - 1; - } + final long firstKey, + final long lastKey, + @NotNull final SortColumn sortColumn, + final short max, + final boolean maxInc) { + + final int start; + final int end; + + if (sortColumn.isAscending()) { + start = Math.toIntExact(firstKey); + end = findEndIndexAscending(region, firstKey, lastKey, max, maxInc); + } else { + start = findStartIndexDescending(region, firstKey, lastKey, max, maxInc); + end = Math.toIntExact(lastKey); + } + + if (start != -1 && end != -1 && start <= end) { + return RowSetFactory.fromRange(start, end); + } + + return RowSetFactory.empty(); + } + + /** + * Finds the starting index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexAscending( + @NotNull final ColumnRegionShort region, + final long firstKey, + final long lastKey, + final short min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final short midValue = region.getShort(mid); + final boolean satisfiesMin = minInc + ? ShortComparisons.geq(midValue, min) + : ShortComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + high = mid - 1; } else { - end = pivot - 1; + low = mid + 1; } } + return ans; + } + + /** + * Finds the ending index for a given value in an ascending (non-descending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexAscending( + @NotNull final ColumnRegionShort region, + final long firstKey, + final long lastKey, + final short max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final short midValue = region.getShort(mid); + final boolean satisfiesMax = maxInc + ? ShortComparisons.leq(midValue, max) + : ShortComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; + } - return matchStart; + /** + * Finds the starting index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param max The value to find. + * @param maxInc If true, the search is inclusive of the value. + * @return The starting index, or -1 if not found. + */ + private static int findStartIndexDescending( + @NotNull final ColumnRegionShort region, + final long firstKey, + final long lastKey, + final short max, + final boolean maxInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final short midValue = region.getShort(mid); + final boolean satisfiesMax = maxInc + ? ShortComparisons.leq(midValue, max) + : ShortComparisons.lt(midValue, max); + + if (satisfiesMax) { + ans = mid; + high = mid - 1; + } else { + low = mid + 1; + } + } + return ans; + } + + /** + * Finds the ending index for a given value in a descending (non-ascending) sorted region. + * + * @param region The column region to search. + * @param firstKey The starting key of the search range. + * @param lastKey The ending key of the search range. + * @param min The value to find. + * @param minInc If true, the search is inclusive of the value. + * @return The ending index, or -1 if not found. + */ + private static int findEndIndexDescending( + @NotNull final ColumnRegionShort region, + final long firstKey, + final long lastKey, + final short min, + final boolean minInc) { + int low = (int) firstKey; + int high = (int) lastKey; + int ans = -1; + + while (low <= high) { + final int mid = low + (high - low) / 2; + final short midValue = region.getShort(mid); + final boolean satisfiesMin = minInc + ? ShortComparisons.geq(midValue, min) + : ShortComparisons.gt(midValue, min); + + if (satisfiesMin) { + ans = mid; + low = mid + 1; + } else { + high = mid - 1; + } + } + return ans; } } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java index 49f73a7f0ec..a01efc648ea 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java @@ -2662,7 +2662,7 @@ private void testRowKeyAgnosticColumnSource( filter0.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(100_000, preFilter.numRowsProcessed()); - assertEquals(1, filter0.numRowsProcessed()); + assertTrue(filter0.numRowsProcessed() <= 1); // Constant (0 if chunk filtered, 1 if table filtered) assertEquals(100_000, postFilter.numRowsProcessed()); // All rows passed assertEquals(100_000, res0.size()); @@ -2682,7 +2682,7 @@ private void testRowKeyAgnosticColumnSource( filter1.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(100_000, preFilter.numRowsProcessed()); - assertEquals(1, filter1.numRowsProcessed()); + assertTrue(filter1.numRowsProcessed() <= 1); // Constant (0 if chunk filtered, 1 if table filtered) assertEquals(0, postFilter.numRowsProcessed()); // No rows passed assertEquals(0, res1.size()); @@ -2712,7 +2712,7 @@ public void testMergedTableSources() { filter0.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(200_000, preFilter.numRowsProcessed()); - assertEquals(100_001, filter0.numRowsProcessed()); // 100_000 from source1, 1 from source2 + assertEquals(100_000, filter0.numRowsProcessed()); // 100_000 from source1, 0 from source2 assertEquals(100_001, postFilter.numRowsProcessed()); // 1 from source1, 100_000 from source2 assertEquals(100_001, res0.size()); // 1 from source1, 100_000 from source2 @@ -2726,7 +2726,7 @@ public void testMergedTableSources() { filter1.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(200_000, preFilter.numRowsProcessed()); - assertEquals(100_001, filter1.numRowsProcessed()); // 100_000 from source1, 1 from source2 + assertEquals(100_000, filter1.numRowsProcessed()); // 100_000 from source1, 0 from source2 assertEquals(99_999, postFilter.numRowsProcessed()); // 99_000 from source1, 0 from source2 assertEquals(99_999, res1.size()); @@ -2754,7 +2754,7 @@ public void testMergedTableSourcesWithRenames() { filter0.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(200_000, preFilter.numRowsProcessed()); - assertEquals(100_001, filter0.numRowsProcessed()); // 100_000 from source1, 1 from source2 + assertEquals(100_000, filter0.numRowsProcessed()); // 100_000 from source1, source2 constant assertEquals(100_001, postFilter.numRowsProcessed()); // 1 from source1, 100_000 from source2 assertEquals(100_001, res0.size()); // 1 from source1, 100_000 from source2 @@ -2771,6 +2771,7 @@ public void testMergedTableSourcesWithRenames2() { .update("A = 42", "B=2", "C=3"); final RowSetCapturingFilter preFilter = new RowSetCapturingFilter(); + // NOTE: filter0 won't be tracked, will be applied as a chunk filter to constant regions. final RowSetCapturingFilter filter0 = new ParallelizedRowSetCapturingFilter(RawString.of("B = 42")); final RowSetCapturingFilter postFilter = new RowSetCapturingFilter(); @@ -2786,7 +2787,7 @@ public void testMergedTableSourcesWithRenames2() { TableTools.showWithRowSet(res0); assertEquals(10, preFilter.numRowsProcessed()); - assertEquals(2, filter0.numRowsProcessed()); // 1 from source1, 1 from source2 + assertEquals(0, filter0.numRowsProcessed()); assertEquals(5, postFilter.numRowsProcessed()); // 5 from source2 assertEquals(5, res0.size()); @@ -2818,7 +2819,7 @@ public void testInterestingMergedTableSources() { filter.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(47620, preFilter.numRowsProcessed()); // 33334 from source1, 14286 from source2 - assertEquals(33335, filter.numRowsProcessed()); // 33334 from source1, 1 from source2 + assertEquals(33334, filter.numRowsProcessed()); // 33334 from source1, source2 constant (not tracked) assertEquals(14287, postFilter.numRowsProcessed()); // 1 from source1, 14286 from source2 assertEquals(14287, res0.size()); // 1 from source1, 100_000 from source2 @@ -2870,8 +2871,8 @@ public void testNestedMergedTables() { filter0.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(400_000, preFilter.numRowsProcessed()); - // 100_000 from source1, 1 from source2, 100_000 from source3, 1 from source4 - assertEquals(200_002, filter0.numRowsProcessed()); + // 100_000 from source1, 100_000 from source3, source2 and source4 are constant (not tracked) + assertEquals(200_000, filter0.numRowsProcessed()); assertEquals(100_001, postFilter.numRowsProcessed()); // 1 from source1, 100_000 from source2 assertEquals(100_001, res0.size()); // 1 from source1, 100_000 from source2 @@ -2885,8 +2886,8 @@ public void testNestedMergedTables() { filter1.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(400_000, preFilter.numRowsProcessed()); - // 100_000 from source1, 1 from source2, 100_000 from source3, 1 from source4 - assertEquals(200_002, filter1.numRowsProcessed()); + // 100_000 from source1, 100_000 from source3, , source2 and source4 are constant (not tracked) + assertEquals(200_000, filter1.numRowsProcessed()); // 44 from source1, 100_000 from source2, 100_000 from source4 assertEquals(200044, postFilter.numRowsProcessed()); assertEquals(200044, res1.size()); @@ -2917,6 +2918,7 @@ public void testNoPushdownWrapperMergedTables() { .update("A = 2L"); // RowKeyAgnosticColumnSource final RowSetCapturingFilter preFilter = new RowSetCapturingFilter(); + // NOTE: filter0 won't be tracked, will be applied as a chunk filter to constant regions. final RowSetCapturingFilter filter0 = new ParallelizedRowSetCapturingFilter(RawString.of("A = 42")); final RowSetCapturingFilter postFilter = new RowSetCapturingFilter(); @@ -2930,7 +2932,7 @@ public void testNoPushdownWrapperMergedTables() { filter0.withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(300_000, preFilter.numRowsProcessed()); - assertEquals(200_001, filter0.numRowsProcessed()); // 100_000 source1, 100_000 source2, 1 source3 + assertEquals(200_000, filter0.numRowsProcessed()); // 100_000 source1, 100_000 source2 assertEquals(100_001, postFilter.numRowsProcessed()); // 1 source1, 100_000 source2, 0 source3 assertEquals(100_001, res0.size()); // 1 from source1, 100_000 from source2 @@ -2944,7 +2946,7 @@ public void testNoPushdownWrapperMergedTables() { RawString.of("A != 42").withRespectedBarriers("1").withDeclaredBarriers("2"), postFilter.withRespectedBarriers("2"))); assertEquals(300_000, preFilter.numRowsProcessed()); - assertEquals(200_001, filter0.numRowsProcessed()); // 100_000 source1, 1 source2, 100_000 source3 + assertEquals(200_000, filter0.numRowsProcessed()); // 100_000 source1, 1 source2, 100_000 source3 assertEquals(199_999, postFilter.numRowsProcessed()); // 99_999 source1, 0 source2, 100_000 source3 assertEquals(199_999, res1.size()); diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestPartitionAwareSourceTableNoMocks.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestPartitionAwareSourceTableNoMocks.java index 856072db817..b4305d71d43 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestPartitionAwareSourceTableNoMocks.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestPartitionAwareSourceTableNoMocks.java @@ -421,8 +421,9 @@ public void testDeferredPartitioningFilterSplitsBarrier() { Assert.eq(filter0.numRowsProcessed(), "filter0.numRowsProcessed()", 2 * partitionSize); // ensure we see the barrier partition filter as filtering only the partitioned rows Assert.eq(filter1.numRowsProcessed(), "filter1.numRowsProcessed()", 4); - // the respects barrier could not be lifted but operates on constant column regions (vs. rows) - Assert.eq(filter2.numRowsProcessed(), "filter2.numRowsProcessed()", 2); + // the respects barrier could not be lifted but operates on constant column regions (vs. rows). Since, + // the filter is performed as a chunk filter, no rows are actually processed by the filter. + Assert.eq(filter2.numRowsProcessed(), "filter2.numRowsProcessed()", 0); Assert.eq(res0.size(), "res0.size()", partitionSize / 2); } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernelTest.java index c5e3126ce7e..084bdc4af62 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ByteRegionBinarySearchKernelTest.java @@ -12,13 +12,13 @@ import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionByte; import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionByte; import io.deephaven.generic.region.AppendOnlyRegionAccessor; import io.deephaven.test.types.ParallelTest; -import io.deephaven.util.QueryConstants; import io.deephaven.util.compare.ByteComparisons; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -31,6 +31,7 @@ import java.util.List; import java.util.Random; import java.util.function.IntToLongFunction; +import static io.deephaven.util.QueryConstants.NULL_BYTE; @Category(ParallelTest.class) public class ByteRegionBinarySearchKernelTest { @@ -41,16 +42,36 @@ public class ByteRegionBinarySearchKernelTest { @Rule public final EngineCleanup framework = new EngineCleanup(); + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add((byte) rnd.nextInt()); + } + data.sort(ByteComparisons::compare); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final byte value = (byte) rnd.nextInt(); + if (value == NULL_BYTE + || Collections.binarySearch(sortedData, value, ByteComparisons::compare) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + private void randomizedTestRunner( int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { final Random rnd = new Random(seed); - final List origData = new ArrayList<>(size); - for (int ii = 0; ii < size; ++ii) { - origData.add((byte) rnd.nextInt()); - } - origData.sort(ByteComparisons::compare); - final List data = new ArrayList<>(origData); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); if (inverted) { java.util.Collections.reverse(data); } @@ -62,39 +83,52 @@ private void randomizedTestRunner( final byte value = data.get(ii); final long startRow = Math.max(0, firstKey.applyAsLong(ii)); final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); - try (final RowSet valuesFound = ByteRegionBinarySearchKernel.binarySearchMatch( + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = ByteRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Byte[] {value})) { + new Byte[] {value}); + final RowSet minMaxFound = ByteRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true)) { if (startRow <= ii && ii <= endRow) { Assert.assertTrue("Expected to find " + value + " at index " + ii, - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } else { Assert.assertFalse("Index should not be populated.", - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); } + } // Test negative lookups - int numFailedLookups = 0; - for (int ii = 0; ii < NUM_NEGATIVE_LOOKUPS && numFailedLookups < MAX_FAILED_LOOKUPS; ++ii) { - final byte value = (byte) rnd.nextInt(); - if (value == QueryConstants.NULL_BYTE - || Collections.binarySearch(origData, value, ByteComparisons::compare) >= 0) { - --ii; - ++numFailedLookups; - continue; - } - + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (Byte missingValue : missingValues) { final long startRow = 0; final long endRow = size - 1; try (final RowSet valuesFound = ByteRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Byte[] {value})) { + new Byte[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = ByteRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { Assert.assertTrue(valuesFound.isEmpty()); } } @@ -222,6 +256,264 @@ public void testInvertedRowIsRange() { } } + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final byte maxValue = (byte) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final byte minValue = (byte) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final byte minCandidate = (byte) rnd.nextInt(); + final byte maxCandidate = (byte) rnd.nextInt(); + + final byte minValue = (byte) Math.min(minCandidate, maxCandidate); + final byte maxValue = (byte) Math.max(minCandidate, maxCandidate); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final byte minValue, + final boolean minInclusive, + final byte maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionByte region = makeColumnRegionByte(dataToUse); + + try (final RowSet result = ByteRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final byte value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(ByteComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(ByteComparisons.compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(ByteComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(ByteComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final byte minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionByte region = makeColumnRegionByte(dataToUse); + + try (final RowSet result = ByteRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final byte value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(ByteComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(ByteComparisons.compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final byte maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionByte region = makeColumnRegionByte(dataToUse); + + try (final RowSet result = ByteRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final byte value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(ByteComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(ByteComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + private static final int PAGE_SIZE = 1 << 16; private static ColumnRegionByte makeColumnRegionByte(@NotNull final List values) { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernelTest.java index ec2e9a37aa9..45fd155b6f6 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernelTest.java @@ -8,13 +8,13 @@ import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionChar; import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionChar; import io.deephaven.generic.region.AppendOnlyRegionAccessor; import io.deephaven.test.types.ParallelTest; -import io.deephaven.util.QueryConstants; import io.deephaven.util.compare.CharComparisons; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -27,6 +27,7 @@ import java.util.List; import java.util.Random; import java.util.function.IntToLongFunction; +import static io.deephaven.util.QueryConstants.NULL_CHAR; @Category(ParallelTest.class) public class CharRegionBinarySearchKernelTest { @@ -37,16 +38,36 @@ public class CharRegionBinarySearchKernelTest { @Rule public final EngineCleanup framework = new EngineCleanup(); + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add((char) rnd.nextInt()); + } + data.sort(CharComparisons::compare); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final char value = (char) rnd.nextInt(); + if (value == NULL_CHAR + || Collections.binarySearch(sortedData, value, CharComparisons::compare) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + private void randomizedTestRunner( int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { final Random rnd = new Random(seed); - final List origData = new ArrayList<>(size); - for (int ii = 0; ii < size; ++ii) { - origData.add((char) rnd.nextInt()); - } - origData.sort(CharComparisons::compare); - final List data = new ArrayList<>(origData); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); if (inverted) { java.util.Collections.reverse(data); } @@ -58,39 +79,52 @@ private void randomizedTestRunner( final char value = data.get(ii); final long startRow = Math.max(0, firstKey.applyAsLong(ii)); final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); - try (final RowSet valuesFound = CharRegionBinarySearchKernel.binarySearchMatch( + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = CharRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Character[] {value})) { + new Character[] {value}); + final RowSet minMaxFound = CharRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true)) { if (startRow <= ii && ii <= endRow) { Assert.assertTrue("Expected to find " + value + " at index " + ii, - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } else { Assert.assertFalse("Index should not be populated.", - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); } + } // Test negative lookups - int numFailedLookups = 0; - for (int ii = 0; ii < NUM_NEGATIVE_LOOKUPS && numFailedLookups < MAX_FAILED_LOOKUPS; ++ii) { - final char value = (char) rnd.nextInt(); - if (value == QueryConstants.NULL_CHAR - || Collections.binarySearch(origData, value, CharComparisons::compare) >= 0) { - --ii; - ++numFailedLookups; - continue; - } - + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (Character missingValue : missingValues) { final long startRow = 0; final long endRow = size - 1; try (final RowSet valuesFound = CharRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Character[] {value})) { + new Character[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = CharRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { Assert.assertTrue(valuesFound.isEmpty()); } } @@ -218,6 +252,264 @@ public void testInvertedRowIsRange() { } } + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final char maxValue = (char) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final char minValue = (char) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final char minCandidate = (char) rnd.nextInt(); + final char maxCandidate = (char) rnd.nextInt(); + + final char minValue = (char) Math.min(minCandidate, maxCandidate); + final char maxValue = (char) Math.max(minCandidate, maxCandidate); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final char minValue, + final boolean minInclusive, + final char maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionChar region = makeColumnRegionChar(dataToUse); + + try (final RowSet result = CharRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final char value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(CharComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(CharComparisons.compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(CharComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(CharComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final char minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionChar region = makeColumnRegionChar(dataToUse); + + try (final RowSet result = CharRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final char value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(CharComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(CharComparisons.compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final char maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionChar region = makeColumnRegionChar(dataToUse); + + try (final RowSet result = CharRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final char value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(CharComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(CharComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + private static final int PAGE_SIZE = 1 << 16; private static ColumnRegionChar makeColumnRegionChar(@NotNull final List values) { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernelTest.java index 1043181ec44..534f91e2a27 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/DoubleRegionBinarySearchKernelTest.java @@ -2,7 +2,7 @@ // Copyright (c) 2016-2026 Deephaven Data Labs and Patent Pending // // ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit CharRegionBinarySearchKernelTest and run "./gradlew replicateRegionAndRegionedSourceTests" to regenerate +// ****** Edit FloatRegionBinarySearchKernelTest and run "./gradlew replicateRegionAndRegionedSourceTests" to regenerate // // @formatter:off package io.deephaven.engine.table.impl.sources.regioned.kernel; @@ -12,13 +12,13 @@ import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionDouble; import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionDouble; import io.deephaven.generic.region.AppendOnlyRegionAccessor; import io.deephaven.test.types.ParallelTest; -import io.deephaven.util.QueryConstants; import io.deephaven.util.compare.DoubleComparisons; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -31,6 +31,7 @@ import java.util.List; import java.util.Random; import java.util.function.IntToLongFunction; +import static io.deephaven.util.QueryConstants.NULL_DOUBLE; @Category(ParallelTest.class) public class DoubleRegionBinarySearchKernelTest { @@ -41,16 +42,36 @@ public class DoubleRegionBinarySearchKernelTest { @Rule public final EngineCleanup framework = new EngineCleanup(); + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add((double) rnd.nextInt()); + } + data.sort(DoubleComparisons::compare); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final double value = (double) rnd.nextInt(); + if (value == NULL_DOUBLE + || Collections.binarySearch(sortedData, value, DoubleComparisons::compare) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + private void randomizedTestRunner( int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { final Random rnd = new Random(seed); - final List origData = new ArrayList<>(size); - for (int ii = 0; ii < size; ++ii) { - origData.add((double) rnd.nextInt()); - } - origData.sort(DoubleComparisons::compare); - final List data = new ArrayList<>(origData); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); if (inverted) { java.util.Collections.reverse(data); } @@ -62,39 +83,52 @@ private void randomizedTestRunner( final double value = data.get(ii); final long startRow = Math.max(0, firstKey.applyAsLong(ii)); final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); - try (final RowSet valuesFound = DoubleRegionBinarySearchKernel.binarySearchMatch( + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = DoubleRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Double[] {value})) { + new Double[] {value}); + final RowSet minMaxFound = DoubleRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true);) { if (startRow <= ii && ii <= endRow) { Assert.assertTrue("Expected to find " + value + " at index " + ii, - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } else { Assert.assertFalse("Index should not be populated.", - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); } + } // Test negative lookups - int numFailedLookups = 0; - for (int ii = 0; ii < NUM_NEGATIVE_LOOKUPS && numFailedLookups < MAX_FAILED_LOOKUPS; ++ii) { - final double value = (double) rnd.nextInt(); - if (value == QueryConstants.NULL_DOUBLE - || Collections.binarySearch(origData, value, DoubleComparisons::compare) >= 0) { - --ii; - ++numFailedLookups; - continue; - } - + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (Double missingValue : missingValues) { final long startRow = 0; final long endRow = size - 1; try (final RowSet valuesFound = DoubleRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Double[] {value})) { + new Double[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = DoubleRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { Assert.assertTrue(valuesFound.isEmpty()); } } @@ -222,6 +256,374 @@ public void testInvertedRowIsRange() { } } + /** + * Match searches for special double values (NULL_DOUBLE, negative infinity, positive infinity, NaN, and -0.0f) should + * return all rows with that value, and only rows with that value, even NaN (due to the definition of "match") + */ + @Test + public void testMatchSpecialDoubles() { + final List specialValues = List.of(NULL_DOUBLE, Double.NEGATIVE_INFINITY, (double)-0.0, Double.POSITIVE_INFINITY, Double.NaN); + + // This list will have 2 of each special value. + final List sortedData = new ArrayList<>(specialValues); + sortedData.addAll(specialValues); + Collections.sort(sortedData, DoubleComparisons::compare); + + final SortColumn sortColumnAsc = SortColumn.asc(ColumnName.of("test")); + final ColumnRegionDouble regionAsc = makeColumnRegionDouble(sortedData); + + for (double f : specialValues) { + try (final RowSet matches = DoubleRegionBinarySearchKernel.binarySearchMatch( + regionAsc, + 0, sortedData.size() - 1, + sortColumnAsc, + new Double[] {f})) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final double value = sortedData.get((int) rowKey); + Assert.assertTrue(DoubleComparisons.eq(value, f)); + }); + } + } + + // Repeat the same test for descending order. + final List sortedDataDesc = new ArrayList<>(sortedData); + Collections.reverse(sortedDataDesc); + final SortColumn sortColumnDesc = SortColumn.desc(ColumnName.of("test")); + final ColumnRegionDouble regionDesc = makeColumnRegionDouble(sortedDataDesc); + + for (double f : specialValues) { + try (final RowSet matches = DoubleRegionBinarySearchKernel.binarySearchMatch( + regionDesc, + 0, sortedData.size() - 1, + sortColumnDesc, + new Double[] {f})) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final double value = sortedDataDesc.get((int) rowKey); + Assert.assertTrue(DoubleComparisons.eq(value, f)); + }); + } + } + } + + /** + * Range searches for special double values (NULL_DOUBLE, negative infinity, positive infinity, NaN, and -0.0f) should + * return all rows with that value, and only rows with that value. + * + * Note that NaN is not special in this case. When greater than RangeFilter for double are created, the upper + * bound is set to NaN (exclusive) and the DoubleRegionBinarySearchKernel will honor the exclusive upper bound + * and exclude NaN values from the results. + */ + @Test + public void testMinMaxSpecialDoubles() { + final List specialValues = List.of(NULL_DOUBLE, Double.NEGATIVE_INFINITY, (double)-0.0, Double.POSITIVE_INFINITY, Double.NaN); + + // This list will have 2 of each special value. + final List sortedData = new ArrayList<>(specialValues); + sortedData.addAll(specialValues); + Collections.sort(sortedData, DoubleComparisons::compare); + + final SortColumn sortColumnAsc = SortColumn.asc(ColumnName.of("test")); + final ColumnRegionDouble regionAsc = makeColumnRegionDouble(sortedData); + + for (double f : specialValues) { + try (final RowSet matches = DoubleRegionBinarySearchKernel.binarySearchMinMax( + regionAsc, + 0, sortedData.size() - 1, + sortColumnAsc, + f, f, true, true)) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final double value = sortedData.get((int) rowKey); + Assert.assertTrue(DoubleComparisons.eq(value, f)); + }); + } + } + + // Repeat the same test for descending order. + final List sortedDataDesc = new ArrayList<>(sortedData); + Collections.reverse(sortedDataDesc); + final SortColumn sortColumnDesc = SortColumn.desc(ColumnName.of("test")); + final ColumnRegionDouble regionDesc = makeColumnRegionDouble(sortedDataDesc); + + for (double f : specialValues) { + try (final RowSet matches = DoubleRegionBinarySearchKernel.binarySearchMinMax( + regionDesc, + 0, sortedDataDesc.size() - 1, + sortColumnDesc, + f, f, true, true)) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final double value = sortedDataDesc.get((int) rowKey); + Assert.assertTrue(DoubleComparisons.eq(value, f)); + }); + } + } + } + + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final double maxValue = (double) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final double minValue = (double) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final double minCandidate = (double) rnd.nextInt(); + final double maxCandidate = (double) rnd.nextInt(); + + final double minValue = (double) Math.min(minCandidate, maxCandidate); + final double maxValue = (double) Math.max(minCandidate, maxCandidate); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final double minValue, + final boolean minInclusive, + final double maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionDouble region = makeColumnRegionDouble(dataToUse); + + try (final RowSet result = DoubleRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final double value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(DoubleComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(DoubleComparisons.compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(DoubleComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(DoubleComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final double minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionDouble region = makeColumnRegionDouble(dataToUse); + + try (final RowSet result = DoubleRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final double value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(DoubleComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(DoubleComparisons.compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final double maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionDouble region = makeColumnRegionDouble(dataToUse); + + try (final RowSet result = DoubleRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final double value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(DoubleComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(DoubleComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + private static final int PAGE_SIZE = 1 << 16; private static ColumnRegionDouble makeColumnRegionDouble(@NotNull final List values) { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernelTest.java index 7baa76f5299..fc21272d3a4 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernelTest.java @@ -1,9 +1,6 @@ // // Copyright (c) 2016-2026 Deephaven Data Labs and Patent Pending // -// ****** AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY -// ****** Edit CharRegionBinarySearchKernelTest and run "./gradlew replicateRegionAndRegionedSourceTests" to regenerate -// // @formatter:off package io.deephaven.engine.table.impl.sources.regioned.kernel; @@ -12,13 +9,13 @@ import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionFloat; import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionFloat; import io.deephaven.generic.region.AppendOnlyRegionAccessor; import io.deephaven.test.types.ParallelTest; -import io.deephaven.util.QueryConstants; import io.deephaven.util.compare.FloatComparisons; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -31,6 +28,7 @@ import java.util.List; import java.util.Random; import java.util.function.IntToLongFunction; +import static io.deephaven.util.QueryConstants.NULL_FLOAT; @Category(ParallelTest.class) public class FloatRegionBinarySearchKernelTest { @@ -41,16 +39,36 @@ public class FloatRegionBinarySearchKernelTest { @Rule public final EngineCleanup framework = new EngineCleanup(); + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add((float) rnd.nextInt()); + } + data.sort(FloatComparisons::compare); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final float value = (float) rnd.nextInt(); + if (value == NULL_FLOAT + || Collections.binarySearch(sortedData, value, FloatComparisons::compare) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + private void randomizedTestRunner( int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { final Random rnd = new Random(seed); - final List origData = new ArrayList<>(size); - for (int ii = 0; ii < size; ++ii) { - origData.add((float) rnd.nextInt()); - } - origData.sort(FloatComparisons::compare); - final List data = new ArrayList<>(origData); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); if (inverted) { java.util.Collections.reverse(data); } @@ -62,39 +80,52 @@ private void randomizedTestRunner( final float value = data.get(ii); final long startRow = Math.max(0, firstKey.applyAsLong(ii)); final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); - try (final RowSet valuesFound = FloatRegionBinarySearchKernel.binarySearchMatch( + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = FloatRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Float[] {value})) { + new Float[] {value}); + final RowSet minMaxFound = FloatRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true);) { if (startRow <= ii && ii <= endRow) { Assert.assertTrue("Expected to find " + value + " at index " + ii, - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } else { Assert.assertFalse("Index should not be populated.", - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); } + } // Test negative lookups - int numFailedLookups = 0; - for (int ii = 0; ii < NUM_NEGATIVE_LOOKUPS && numFailedLookups < MAX_FAILED_LOOKUPS; ++ii) { - final float value = (float) rnd.nextInt(); - if (value == QueryConstants.NULL_FLOAT - || Collections.binarySearch(origData, value, FloatComparisons::compare) >= 0) { - --ii; - ++numFailedLookups; - continue; - } - + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (Float missingValue : missingValues) { final long startRow = 0; final long endRow = size - 1; try (final RowSet valuesFound = FloatRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Float[] {value})) { + new Float[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = FloatRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { Assert.assertTrue(valuesFound.isEmpty()); } } @@ -222,6 +253,374 @@ public void testInvertedRowIsRange() { } } + /** + * Match searches for special float values (NULL_FLOAT, negative infinity, positive infinity, NaN, and -0.0f) should + * return all rows with that value, and only rows with that value, even NaN (due to the definition of "match") + */ + @Test + public void testMatchSpecialFloats() { + final List specialValues = List.of(NULL_FLOAT, Float.NEGATIVE_INFINITY, (float)-0.0, Float.POSITIVE_INFINITY, Float.NaN); + + // This list will have 2 of each special value. + final List sortedData = new ArrayList<>(specialValues); + sortedData.addAll(specialValues); + Collections.sort(sortedData, FloatComparisons::compare); + + final SortColumn sortColumnAsc = SortColumn.asc(ColumnName.of("test")); + final ColumnRegionFloat regionAsc = makeColumnRegionFloat(sortedData); + + for (float f : specialValues) { + try (final RowSet matches = FloatRegionBinarySearchKernel.binarySearchMatch( + regionAsc, + 0, sortedData.size() - 1, + sortColumnAsc, + new Float[] {f})) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final float value = sortedData.get((int) rowKey); + Assert.assertTrue(FloatComparisons.eq(value, f)); + }); + } + } + + // Repeat the same test for descending order. + final List sortedDataDesc = new ArrayList<>(sortedData); + Collections.reverse(sortedDataDesc); + final SortColumn sortColumnDesc = SortColumn.desc(ColumnName.of("test")); + final ColumnRegionFloat regionDesc = makeColumnRegionFloat(sortedDataDesc); + + for (float f : specialValues) { + try (final RowSet matches = FloatRegionBinarySearchKernel.binarySearchMatch( + regionDesc, + 0, sortedData.size() - 1, + sortColumnDesc, + new Float[] {f})) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final float value = sortedDataDesc.get((int) rowKey); + Assert.assertTrue(FloatComparisons.eq(value, f)); + }); + } + } + } + + /** + * Range searches for special float values (NULL_FLOAT, negative infinity, positive infinity, NaN, and -0.0f) should + * return all rows with that value, and only rows with that value. + * + * Note that NaN is not special in this case. When greater than RangeFilter for float are created, the upper + * bound is set to NaN (exclusive) and the FloatRegionBinarySearchKernel will honor the exclusive upper bound + * and exclude NaN values from the results. + */ + @Test + public void testMinMaxSpecialFloats() { + final List specialValues = List.of(NULL_FLOAT, Float.NEGATIVE_INFINITY, (float)-0.0, Float.POSITIVE_INFINITY, Float.NaN); + + // This list will have 2 of each special value. + final List sortedData = new ArrayList<>(specialValues); + sortedData.addAll(specialValues); + Collections.sort(sortedData, FloatComparisons::compare); + + final SortColumn sortColumnAsc = SortColumn.asc(ColumnName.of("test")); + final ColumnRegionFloat regionAsc = makeColumnRegionFloat(sortedData); + + for (float f : specialValues) { + try (final RowSet matches = FloatRegionBinarySearchKernel.binarySearchMinMax( + regionAsc, + 0, sortedData.size() - 1, + sortColumnAsc, + f, f, true, true)) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final float value = sortedData.get((int) rowKey); + Assert.assertTrue(FloatComparisons.eq(value, f)); + }); + } + } + + // Repeat the same test for descending order. + final List sortedDataDesc = new ArrayList<>(sortedData); + Collections.reverse(sortedDataDesc); + final SortColumn sortColumnDesc = SortColumn.desc(ColumnName.of("test")); + final ColumnRegionFloat regionDesc = makeColumnRegionFloat(sortedDataDesc); + + for (float f : specialValues) { + try (final RowSet matches = FloatRegionBinarySearchKernel.binarySearchMinMax( + regionDesc, + 0, sortedDataDesc.size() - 1, + sortColumnDesc, + f, f, true, true)) { + + Assert.assertEquals(2, matches.size()); + matches.forAllRowKeys(rowKey -> { + final float value = sortedDataDesc.get((int) rowKey); + Assert.assertTrue(FloatComparisons.eq(value, f)); + }); + } + } + } + + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final float maxValue = (float) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final float minValue = (float) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final float minCandidate = (float) rnd.nextInt(); + final float maxCandidate = (float) rnd.nextInt(); + + final float minValue = (float) Math.min(minCandidate, maxCandidate); + final float maxValue = (float) Math.max(minCandidate, maxCandidate); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final float minValue, + final boolean minInclusive, + final float maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionFloat region = makeColumnRegionFloat(dataToUse); + + try (final RowSet result = FloatRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final float value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(FloatComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(FloatComparisons.compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(FloatComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(FloatComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final float minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionFloat region = makeColumnRegionFloat(dataToUse); + + try (final RowSet result = FloatRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final float value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(FloatComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(FloatComparisons.compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final float maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionFloat region = makeColumnRegionFloat(dataToUse); + + try (final RowSet result = FloatRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final float value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(FloatComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(FloatComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + private static final int PAGE_SIZE = 1 << 16; private static ColumnRegionFloat makeColumnRegionFloat(@NotNull final List values) { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernelTest.java index f337f48a410..5b5fecb596a 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/IntRegionBinarySearchKernelTest.java @@ -12,13 +12,13 @@ import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionInt; import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionInt; import io.deephaven.generic.region.AppendOnlyRegionAccessor; import io.deephaven.test.types.ParallelTest; -import io.deephaven.util.QueryConstants; import io.deephaven.util.compare.IntComparisons; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -31,6 +31,7 @@ import java.util.List; import java.util.Random; import java.util.function.IntToLongFunction; +import static io.deephaven.util.QueryConstants.NULL_INT; @Category(ParallelTest.class) public class IntRegionBinarySearchKernelTest { @@ -41,16 +42,36 @@ public class IntRegionBinarySearchKernelTest { @Rule public final EngineCleanup framework = new EngineCleanup(); + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add((int) rnd.nextInt()); + } + data.sort(IntComparisons::compare); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final int value = (int) rnd.nextInt(); + if (value == NULL_INT + || Collections.binarySearch(sortedData, value, IntComparisons::compare) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + private void randomizedTestRunner( int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { final Random rnd = new Random(seed); - final List origData = new ArrayList<>(size); - for (int ii = 0; ii < size; ++ii) { - origData.add((int) rnd.nextInt()); - } - origData.sort(IntComparisons::compare); - final List data = new ArrayList<>(origData); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); if (inverted) { java.util.Collections.reverse(data); } @@ -62,39 +83,52 @@ private void randomizedTestRunner( final int value = data.get(ii); final long startRow = Math.max(0, firstKey.applyAsLong(ii)); final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); - try (final RowSet valuesFound = IntRegionBinarySearchKernel.binarySearchMatch( + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = IntRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Integer[] {value})) { + new Integer[] {value}); + final RowSet minMaxFound = IntRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true)) { if (startRow <= ii && ii <= endRow) { Assert.assertTrue("Expected to find " + value + " at index " + ii, - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } else { Assert.assertFalse("Index should not be populated.", - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); } + } // Test negative lookups - int numFailedLookups = 0; - for (int ii = 0; ii < NUM_NEGATIVE_LOOKUPS && numFailedLookups < MAX_FAILED_LOOKUPS; ++ii) { - final int value = (int) rnd.nextInt(); - if (value == QueryConstants.NULL_INT - || Collections.binarySearch(origData, value, IntComparisons::compare) >= 0) { - --ii; - ++numFailedLookups; - continue; - } - + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (Integer missingValue : missingValues) { final long startRow = 0; final long endRow = size - 1; try (final RowSet valuesFound = IntRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Integer[] {value})) { + new Integer[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = IntRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { Assert.assertTrue(valuesFound.isEmpty()); } } @@ -222,6 +256,264 @@ public void testInvertedRowIsRange() { } } + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final int maxValue = (int) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final int minValue = (int) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final int minCandidate = (int) rnd.nextInt(); + final int maxCandidate = (int) rnd.nextInt(); + + final int minValue = (int) Math.min(minCandidate, maxCandidate); + final int maxValue = (int) Math.max(minCandidate, maxCandidate); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final int minValue, + final boolean minInclusive, + final int maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionInt region = makeColumnRegionInt(dataToUse); + + try (final RowSet result = IntRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final int value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(IntComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(IntComparisons.compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(IntComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(IntComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final int minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionInt region = makeColumnRegionInt(dataToUse); + + try (final RowSet result = IntRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final int value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(IntComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(IntComparisons.compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final int maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionInt region = makeColumnRegionInt(dataToUse); + + try (final RowSet result = IntRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final int value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(IntComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(IntComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + private static final int PAGE_SIZE = 1 << 16; private static ColumnRegionInt makeColumnRegionInt(@NotNull final List values) { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernelTest.java index 1f06448a7a9..37af0131ff6 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/LongRegionBinarySearchKernelTest.java @@ -12,13 +12,13 @@ import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionLong; import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionLong; import io.deephaven.generic.region.AppendOnlyRegionAccessor; import io.deephaven.test.types.ParallelTest; -import io.deephaven.util.QueryConstants; import io.deephaven.util.compare.LongComparisons; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -31,6 +31,7 @@ import java.util.List; import java.util.Random; import java.util.function.IntToLongFunction; +import static io.deephaven.util.QueryConstants.NULL_LONG; @Category(ParallelTest.class) public class LongRegionBinarySearchKernelTest { @@ -41,16 +42,36 @@ public class LongRegionBinarySearchKernelTest { @Rule public final EngineCleanup framework = new EngineCleanup(); + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add((long) rnd.nextInt()); + } + data.sort(LongComparisons::compare); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final long value = (long) rnd.nextInt(); + if (value == NULL_LONG + || Collections.binarySearch(sortedData, value, LongComparisons::compare) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + private void randomizedTestRunner( int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { final Random rnd = new Random(seed); - final List origData = new ArrayList<>(size); - for (int ii = 0; ii < size; ++ii) { - origData.add((long) rnd.nextInt()); - } - origData.sort(LongComparisons::compare); - final List data = new ArrayList<>(origData); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); if (inverted) { java.util.Collections.reverse(data); } @@ -62,39 +83,52 @@ private void randomizedTestRunner( final long value = data.get(ii); final long startRow = Math.max(0, firstKey.applyAsLong(ii)); final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); - try (final RowSet valuesFound = LongRegionBinarySearchKernel.binarySearchMatch( + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = LongRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Long[] {value})) { + new Long[] {value}); + final RowSet minMaxFound = LongRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true)) { if (startRow <= ii && ii <= endRow) { Assert.assertTrue("Expected to find " + value + " at index " + ii, - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } else { Assert.assertFalse("Index should not be populated.", - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); } + } // Test negative lookups - int numFailedLookups = 0; - for (int ii = 0; ii < NUM_NEGATIVE_LOOKUPS && numFailedLookups < MAX_FAILED_LOOKUPS; ++ii) { - final long value = (long) rnd.nextInt(); - if (value == QueryConstants.NULL_LONG - || Collections.binarySearch(origData, value, LongComparisons::compare) >= 0) { - --ii; - ++numFailedLookups; - continue; - } - + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (Long missingValue : missingValues) { final long startRow = 0; final long endRow = size - 1; try (final RowSet valuesFound = LongRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Long[] {value})) { + new Long[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = LongRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { Assert.assertTrue(valuesFound.isEmpty()); } } @@ -222,6 +256,264 @@ public void testInvertedRowIsRange() { } } + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final long maxValue = (long) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final long minValue = (long) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final long minCandidate = (long) rnd.nextInt(); + final long maxCandidate = (long) rnd.nextInt(); + + final long minValue = (long) Math.min(minCandidate, maxCandidate); + final long maxValue = (long) Math.max(minCandidate, maxCandidate); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final long minValue, + final boolean minInclusive, + final long maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionLong region = makeColumnRegionLong(dataToUse); + + try (final RowSet result = LongRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final long value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(LongComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(LongComparisons.compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(LongComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(LongComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final long minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionLong region = makeColumnRegionLong(dataToUse); + + try (final RowSet result = LongRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final long value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(LongComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(LongComparisons.compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final long maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionLong region = makeColumnRegionLong(dataToUse); + + try (final RowSet result = LongRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final long value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(LongComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(LongComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + private static final int PAGE_SIZE = 1 << 16; private static ColumnRegionLong makeColumnRegionLong(@NotNull final List values) { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ObjectRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ObjectRegionBinarySearchKernelTest.java new file mode 100644 index 00000000000..bfa8d3ce3f7 --- /dev/null +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ObjectRegionBinarySearchKernelTest.java @@ -0,0 +1,553 @@ +// +// Copyright (c) 2016-2026 Deephaven Data Labs and Patent Pending +// +package io.deephaven.engine.table.impl.sources.regioned.kernel; + +import io.deephaven.api.ColumnName; +import io.deephaven.api.SortColumn; +import io.deephaven.chunk.WritableChunk; +import io.deephaven.chunk.attributes.Values; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionObject; +import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; +import io.deephaven.engine.testutil.junit4.EngineCleanup; +import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionObject; +import io.deephaven.generic.region.AppendOnlyRegionAccessor; +import io.deephaven.test.types.ParallelTest; +import org.jetbrains.annotations.NotNull; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Random; +import java.util.function.IntToLongFunction; + +@Category(ParallelTest.class) +public class ObjectRegionBinarySearchKernelTest { + private static final int[] SIZES = {10, 100, 1000000}; + private static final int MAX_FAILED_LOOKUPS = 1000; + private static final int NUM_NEGATIVE_LOOKUPS = 100; + + @Rule + public final EngineCleanup framework = new EngineCleanup(); + + private static int STRING_LENGTH = 3; + private static final char[] ALPHA_NUMERIC_CHARS = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789".toCharArray(); + + private static String makeString(Random random, int length) { + if (length <= 0) { + return ""; + } + + char[] buffer = new char[length]; + + for (int i = 0; i < length; i++) { + // Fetch a random character directly from the static array + buffer[i] = ALPHA_NUMERIC_CHARS[random.nextInt(ALPHA_NUMERIC_CHARS.length)]; + } + + // A single String allocation at the very end + return new String(buffer); + } + + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add(makeString(rnd, STRING_LENGTH)); + } + data.sort(Comparator.naturalOrder()); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final String value = makeString(rnd, STRING_LENGTH); + if (value == null + || Collections.binarySearch(sortedData, value, Comparator.naturalOrder()) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + + private void randomizedTestRunner( + int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { + + final Random rnd = new Random(seed); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); + if (inverted) { + java.util.Collections.reverse(data); + } + final ColumnRegionObject region = makeColumnRegionObject(data); + ColumnName columnName = ColumnName.of("test"); + final SortColumn sortColumn = inverted ? SortColumn.desc(columnName) : SortColumn.asc(columnName); + + for (int ii = 0; ii < size; ++ii) { + final String value = data.get(ii); + final long startRow = Math.max(0, firstKey.applyAsLong(ii)); + final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = ObjectRegionBinarySearchKernel.binarySearchMatch( + region, + startRow, endRow, + sortColumn, + new String[] {value}); + final RowSet minMaxFound = ObjectRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true);) { + if (startRow <= ii && ii <= endRow) { + Assert.assertTrue("Expected to find " + value + " at index " + ii, + matchesFound.containsRange(ii, ii)); + } else { + Assert.assertFalse("Index should not be populated.", + matchesFound.containsRange(ii, ii)); + } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); + } + + } + + // Test negative lookups + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (String missingValue : missingValues) { + final long startRow = 0; + final long endRow = size - 1; + try (final RowSet valuesFound = ObjectRegionBinarySearchKernel.binarySearchMatch( + region, + startRow, endRow, + sortColumn, + new String[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = ObjectRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { + Assert.assertTrue(valuesFound.isEmpty()); + } + } + } + + private void randomizedTestRunner( + int size, int seed, IntToLongFunction firstKey, IntToLongFunction lastKey) { + randomizedTestRunner(size, seed, false, firstKey, lastKey); + } + + private void invertedRandomizedTestRunner( + int size, int seed, IntToLongFunction firstKey, IntToLongFunction lastKey) { + randomizedTestRunner(size, seed, true, firstKey, lastKey); + } + + @Test + public void testRandomizedDataFullRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> 0, i -> size); + } + } + + @Test + public void testRowIsAboveRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> 0, i -> i - 1); + } + } + + @Test + public void testRowUpperBoundRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> 0, i -> i); + } + } + + @Test + public void testRowInLowerRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> 0, i -> i + 1); + } + } + + @Test + public void testRowIsBelowRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> i + 1, i -> size); + } + } + + @Test + public void testRowLowerBoundRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> i, i -> size); + } + } + + @Test + public void testRowInUpperRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> i - 1, i -> size); + } + } + + @Test + public void testRowIsRange() { + for (int size : SIZES) { + randomizedTestRunner(size, 0, i -> i, i -> i); + } + } + + @Test + public void testInvertedRandomizedDataFullRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> 0, i -> size); + } + } + + @Test + public void testInvertedRowIsAboveRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> 0, i -> i - 1); + } + } + + @Test + public void testInvertedRowUpperBoundRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> 0, i -> i); + } + } + + @Test + public void testInvertedRowInLowerRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> 0, i -> i + 1); + } + } + + @Test + public void testInvertedRowIsBelowRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> i + 1, i -> size); + } + } + + @Test + public void testInvertedRowLowerBoundRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> i, i -> size); + } + } + + @Test + public void testInvertedRowInUpperRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> i - 1, i -> size); + } + } + + @Test + public void testInvertedRowIsRange() { + for (int size : SIZES) { + invertedRandomizedTestRunner(size, 0, i -> i, i -> i); + } + } + + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final String maxValue = makeString(rnd, STRING_LENGTH); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final String minValue = makeString(rnd, STRING_LENGTH); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final String minCandidate = makeString(rnd, STRING_LENGTH); + final String maxCandidate = makeString(rnd, STRING_LENGTH); + + final String minValue = Collections.min(List.of(minCandidate, maxCandidate)); + final String maxValue = Collections.max(List.of(minCandidate, maxCandidate)); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final String maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionObject region = makeColumnRegionObject(dataToUse); + + try (final RowSet result = ObjectRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final String value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(Comparator.naturalOrder().compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(Comparator.naturalOrder().compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final String minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionObject region = makeColumnRegionObject(dataToUse); + + try (final RowSet result = ObjectRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final String value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(Comparator.naturalOrder().compare(value, minValue) >= 0); + } else { + Assert.assertTrue(Comparator.naturalOrder().compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final String minValue, + final boolean minInclusive, + final String maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionObject region = makeColumnRegionObject(dataToUse); + + try (final RowSet result = ObjectRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final String value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(Comparator.naturalOrder().compare(value, minValue) >= 0); + } else { + Assert.assertTrue(Comparator.naturalOrder().compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(Comparator.naturalOrder().compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(Comparator.naturalOrder().compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private static final int PAGE_SIZE = 1 << 16; + + private static ColumnRegionObject makeColumnRegionObject(@NotNull final List values) { + return new AppendOnlyFixedSizePageRegionObject<>( + RegionedColumnSource.ROW_KEY_TO_SUB_REGION_ROW_INDEX_MASK, PAGE_SIZE, new AppendOnlyRegionAccessor<>() { + @Override + public void readChunkPage(long firstRowPosition, int minimumSize, + @NotNull WritableChunk destination) { + int finalSize = (int) Math.min(minimumSize, values.size() - firstRowPosition); + destination.setSize(finalSize); + for (int ii = 0; ii < finalSize; ++ii) { + destination.asWritableObjectChunk().set(ii, values.get((int) firstRowPosition + ii)); + } + } + + @Override + public long size() { + return values.size(); + } + }); + } +} diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernelTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernelTest.java index 65291d1f8bb..09bada3debf 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernelTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/ShortRegionBinarySearchKernelTest.java @@ -12,13 +12,13 @@ import io.deephaven.chunk.WritableChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionShort; import io.deephaven.engine.table.impl.sources.regioned.RegionedColumnSource; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.generic.region.AppendOnlyFixedSizePageRegionShort; import io.deephaven.generic.region.AppendOnlyRegionAccessor; import io.deephaven.test.types.ParallelTest; -import io.deephaven.util.QueryConstants; import io.deephaven.util.compare.ShortComparisons; import org.jetbrains.annotations.NotNull; import org.junit.Assert; @@ -31,6 +31,7 @@ import java.util.List; import java.util.Random; import java.util.function.IntToLongFunction; +import static io.deephaven.util.QueryConstants.NULL_SHORT; @Category(ParallelTest.class) public class ShortRegionBinarySearchKernelTest { @@ -41,16 +42,36 @@ public class ShortRegionBinarySearchKernelTest { @Rule public final EngineCleanup framework = new EngineCleanup(); + private static List makeSortedData(int size, Random rnd) { + final List data = new ArrayList<>(size); + for (int ii = 0; ii < size; ++ii) { + data.add((short) rnd.nextInt()); + } + data.sort(ShortComparisons::compare); + return data; + } + + private static List findAbsentValues(List sortedData, int num, int maxFailures, Random rnd) { + final List missingValues = new ArrayList<>(); + int numFailedLookups = 0; + while (missingValues.size() < num && numFailedLookups < maxFailures) { + final short value = (short) rnd.nextInt(); + if (value == NULL_SHORT + || Collections.binarySearch(sortedData, value, ShortComparisons::compare) >= 0) { + numFailedLookups++; + continue; + } + missingValues.add(value); + } + return missingValues; + } + private void randomizedTestRunner( int size, int seed, boolean inverted, IntToLongFunction firstKey, IntToLongFunction lastKey) { final Random rnd = new Random(seed); - final List origData = new ArrayList<>(size); - for (int ii = 0; ii < size; ++ii) { - origData.add((short) rnd.nextInt()); - } - origData.sort(ShortComparisons::compare); - final List data = new ArrayList<>(origData); + final List sortedData = makeSortedData(size, rnd); + final List data = new ArrayList<>(sortedData); if (inverted) { java.util.Collections.reverse(data); } @@ -62,39 +83,52 @@ private void randomizedTestRunner( final short value = data.get(ii); final long startRow = Math.max(0, firstKey.applyAsLong(ii)); final long endRow = Math.min(size - 1, lastKey.applyAsLong(ii)); - try (final RowSet valuesFound = ShortRegionBinarySearchKernel.binarySearchMatch( + // Test match search and min/max search give the same results for this value. + try (final RowSet matchesFound = ShortRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Short[] {value})) { + new Short[] {value}); + final RowSet minMaxFound = ShortRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + value, + value, true, + true)) { if (startRow <= ii && ii <= endRow) { Assert.assertTrue("Expected to find " + value + " at index " + ii, - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } else { Assert.assertFalse("Index should not be populated.", - valuesFound.containsRange(ii, ii)); + matchesFound.containsRange(ii, ii)); } + Assert.assertEquals("binarySearchMatch and binarySearchMinMax should return the same results.", + matchesFound, minMaxFound); } + } // Test negative lookups - int numFailedLookups = 0; - for (int ii = 0; ii < NUM_NEGATIVE_LOOKUPS && numFailedLookups < MAX_FAILED_LOOKUPS; ++ii) { - final short value = (short) rnd.nextInt(); - if (value == QueryConstants.NULL_SHORT - || Collections.binarySearch(origData, value, ShortComparisons::compare) >= 0) { - --ii; - ++numFailedLookups; - continue; - } - + final List missingValues = + findAbsentValues(sortedData, NUM_NEGATIVE_LOOKUPS, MAX_FAILED_LOOKUPS, rnd); + for (Short missingValue : missingValues) { final long startRow = 0; final long endRow = size - 1; try (final RowSet valuesFound = ShortRegionBinarySearchKernel.binarySearchMatch( region, startRow, endRow, sortColumn, - new Short[] {value})) { + new Short[] {missingValue})) { + Assert.assertTrue(valuesFound.isEmpty()); + } + try (final RowSet valuesFound = ShortRegionBinarySearchKernel.binarySearchMinMax( + region, + startRow, endRow, + sortColumn, + missingValue, + missingValue, false, + false)) { Assert.assertTrue(valuesFound.isEmpty()); } } @@ -222,6 +256,264 @@ public void testInvertedRowIsRange() { } } + @Test + public void testBinSearchMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final short maxValue = (short) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + maxTestRunner(data, false, firstKey, lastKey, maxValue, true); + maxTestRunner(data, false, firstKey, lastKey, maxValue, false); + + maxTestRunner(data, true, firstKey, lastKey, maxValue, true); + maxTestRunner(data, true, firstKey, lastKey, maxValue, false); + } + } + } + + @Test + public void testBinSearchMinRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final short minValue = (short) rnd.nextInt(); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + minTestRunner(data, false, firstKey, lastKey, minValue, true); + minTestRunner(data, false, firstKey, lastKey, minValue, false); + + minTestRunner(data, true, firstKey, lastKey, minValue, true); + minTestRunner(data, true, firstKey, lastKey, minValue, false); + } + } + } + + @Test + public void testBinSearchMinMaxRandom() { + final Random rnd = new Random(0); + + final int steps = 20; + for (int size : SIZES) { + final List data = makeSortedData(size, rnd); + + for (int step = 0; step < steps; ++step) { + System.out.println("Size = " + size + ", step = " + step); + final short minCandidate = (short) rnd.nextInt(); + final short maxCandidate = (short) rnd.nextInt(); + + final short minValue = (short) Math.min(minCandidate, maxCandidate); + final short maxValue = (short) Math.max(minCandidate, maxCandidate); + + final long firstKeyCandidate = rnd.nextInt(size); + final long lastKeyCandidate = rnd.nextInt(size); + + final long firstKey = Math.min(firstKeyCandidate, lastKeyCandidate); + final long lastKey = Math.max(firstKeyCandidate, lastKeyCandidate); + + // Test all combinations of inverted/inclusive/exclusive min/max. + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, false, firstKey, lastKey, minValue, false, maxValue, false); + + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, true, maxValue, false); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, true); + minMaxTestRunner(data, true, firstKey, lastKey, minValue, false, maxValue, false); + } + } + } + + private void minMaxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final short minValue, + final boolean minInclusive, + final short maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionShort region = makeColumnRegionShort(dataToUse); + + try (final RowSet result = ShortRegionBinarySearchKernel.binarySearchMinMax( + region, firstKey, lastKey, sortColumn, minValue, maxValue, minInclusive, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final short value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(ShortComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(ShortComparisons.compare(value, minValue) > 0); + } + if (maxInclusive) { + Assert.assertTrue(ShortComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(ShortComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void minTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final short minValue, + final boolean minInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionShort region = makeColumnRegionShort(dataToUse); + + try (final RowSet result = ShortRegionBinarySearchKernel.binarySearchMin( + region, firstKey, lastKey, sortColumn, minValue, minInclusive)) { + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final short value = dataToUse.get((int) rowKey); + if (minInclusive) { + Assert.assertTrue(ShortComparisons.compare(value, minValue) >= 0); + } else { + Assert.assertTrue(ShortComparisons.compare(value, minValue) > 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + + private void maxTestRunner( + List data, + final boolean inverted, + final long firstKey, + final long lastKey, + final short maxValue, + final boolean maxInclusive) { + + final List dataToUse; + final SortColumn sortColumn; + if (inverted) { + dataToUse = new ArrayList<>(data); + Collections.reverse(dataToUse); + sortColumn = SortColumn.desc(ColumnName.of("test")); + } else { + dataToUse = data; + sortColumn = SortColumn.asc(ColumnName.of("test")); + } + + final ColumnRegionShort region = makeColumnRegionShort(dataToUse); + + try (final RowSet result = ShortRegionBinarySearchKernel.binarySearchMax( + region, firstKey, lastKey, sortColumn, maxValue, maxInclusive)) { + + // Test from 0 to firstKey - 1 to make sure no false positives are found below the first key. + if (firstKey > 0) { + try (final RowSet excludedLow = RowSetFactory.fromRange(0, firstKey - 1); + final RowSet intersection = result.intersect(excludedLow)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + + // Go through every value in the result and ensure it is within the min/max bounds. + result.forAllRowKeys(rowKey -> { + // Must be within the first/last key bounds + Assert.assertTrue(rowKey >= firstKey && rowKey <= lastKey); + + // The value at the row key must be within the min/max bounds. + final short value = dataToUse.get((int) rowKey); + if (maxInclusive) { + Assert.assertTrue(ShortComparisons.compare(value, maxValue) <= 0); + } else { + Assert.assertTrue(ShortComparisons.compare(value, maxValue) < 0); + } + }); + + // Test from lastKey + 1 to make sure no false positives are found above the lastKey. + try (final RowSet excludedHigh = RowSetFactory.fromRange(lastKey + 1, Long.MAX_VALUE); + final RowSet intersection = result.intersect(excludedHigh)) { + Assert.assertTrue(intersection.isEmpty()); + } + } + } + private static final int PAGE_SIZE = 1 << 16; private static ColumnRegionShort makeColumnRegionShort(@NotNull final List values) { diff --git a/engine/test-utils/src/main/java/io/deephaven/engine/testutil/filters/RowSetCapturingFilter.java b/engine/test-utils/src/main/java/io/deephaven/engine/testutil/filters/RowSetCapturingFilter.java index c7fdaae87e4..72b806b3233 100644 --- a/engine/test-utils/src/main/java/io/deephaven/engine/testutil/filters/RowSetCapturingFilter.java +++ b/engine/test-utils/src/main/java/io/deephaven/engine/testutil/filters/RowSetCapturingFilter.java @@ -10,6 +10,7 @@ import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.impl.QueryCompilerRequestProcessor; import io.deephaven.engine.table.impl.select.WhereFilter; +import io.deephaven.engine.table.impl.select.WhereFilterDelegating; import io.deephaven.engine.table.impl.select.WhereFilterImpl; import io.deephaven.util.SafeCloseable; import org.jetbrains.annotations.NotNull; @@ -27,7 +28,7 @@ *

* Once used, or between-uses, it is expected that the {@link #reset()} method is called to clear the captured RowSets. */ -public class RowSetCapturingFilter extends WhereFilterImpl implements SafeCloseable { +public class RowSetCapturingFilter extends WhereFilterImpl implements WhereFilterDelegating, SafeCloseable { final List rowSets; final WhereFilter innerFilter; @@ -59,6 +60,16 @@ public RowSetCapturingFilter(final Filter filter) { this.innerFilter = filter; } + @Override + public WhereFilter getWrappedFilter() { + return innerFilter; + } + + @Override + public WhereFilter maybeUnwrapFilter() { + return WhereFilterDelegating.maybeUnwrapFilter(innerFilter); + } + @Override public List getColumns() { return innerFilter == null ? Collections.emptyList() : innerFilter.getColumns(); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionByte.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionByte.java index 35c6a5a841f..bc382a2e4c5 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionByte.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionByte.java @@ -13,13 +13,32 @@ import io.deephaven.engine.rowset.RowSequenceFactory; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.ByteRangeFilter; +import io.deephaven.engine.table.impl.select.MatchFilter; +import io.deephaven.engine.table.impl.select.RangeFilter; +import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionByte; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownAction; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownFilterContext; +import io.deephaven.engine.table.impl.sources.regioned.kernel.ByteRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPage; import org.jetbrains.annotations.NotNull; +import java.util.List; + +import static io.deephaven.util.QueryConstants.MAX_BYTE; +import static io.deephaven.util.QueryConstants.NULL_BYTE; + /** * {@link ColumnRegionByte} implementation for regions that support fetching primitive bytes from * {@link ColumnChunkPageStore column chunk page stores}. @@ -27,15 +46,22 @@ public final class ParquetColumnRegionByte extends ParquetColumnRegionBase implements ColumnRegionByte, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + public ParquetColumnRegionByte(@NotNull final ColumnChunkPageStore columnChunkPageStore, @NotNull final ColumnLocation columnLocation) { super(columnChunkPageStore.mask(), columnChunkPageStore, columnLocation); } - // region getBytes public byte[] getBytes( final long firstRowKey, - @NotNull final byte[] destination, + final byte[] destination, final int destinationOffset, final int length ) { @@ -56,4 +82,136 @@ public byte getByte(final long rowKey) { throw new TableDataException("Error retrieving byte at row key " + rowKey + " from a parquet table", e); } } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filters can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = ByteRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof ByteRangeFilter) { + final ByteRangeFilter rangeFilter = (ByteRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.getLower() == NULL_BYTE && rangeFilter.isLowerInclusive()) { + // Only need to find the upper bound, as the lower bound includes all values. + matches = ByteRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getUpper(), + rangeFilter.isUpperInclusive()); + } else if (rangeFilter.getUpper() == MAX_BYTE && rangeFilter.isUpperInclusive()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = ByteRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.isLowerInclusive()); + } else { + // Find the lower and upper bounds. + matches = ByteRegionBinarySearchKernel.binarySearchMinMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.getUpper(), + rangeFilter.isLowerInclusive(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionChar.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionChar.java index c79d8531863..d9b71100ea6 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionChar.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionChar.java @@ -4,13 +4,32 @@ package io.deephaven.parquet.table.region; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.CharRangeFilter; +import io.deephaven.engine.table.impl.select.MatchFilter; +import io.deephaven.engine.table.impl.select.RangeFilter; +import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionChar; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownAction; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownFilterContext; +import io.deephaven.engine.table.impl.sources.regioned.kernel.CharRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPage; import org.jetbrains.annotations.NotNull; +import java.util.List; + +import static io.deephaven.util.QueryConstants.MAX_CHAR; +import static io.deephaven.util.QueryConstants.NULL_CHAR; + /** * {@link ColumnRegionChar} implementation for regions that support fetching primitive chars from * {@link ColumnChunkPageStore column chunk page stores}. @@ -18,11 +37,18 @@ public final class ParquetColumnRegionChar extends ParquetColumnRegionBase implements ColumnRegionChar, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + public ParquetColumnRegionChar(@NotNull final ColumnChunkPageStore columnChunkPageStore, @NotNull final ColumnLocation columnLocation) { super(columnChunkPageStore.mask(), columnChunkPageStore, columnLocation); } - // region getBytes // endregion getBytes @@ -35,4 +61,136 @@ public char getChar(final long rowKey) { throw new TableDataException("Error retrieving char at row key " + rowKey + " from a parquet table", e); } } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filters can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = CharRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof CharRangeFilter) { + final CharRangeFilter rangeFilter = (CharRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.getLower() == NULL_CHAR && rangeFilter.isLowerInclusive()) { + // Only need to find the upper bound, as the lower bound includes all values. + matches = CharRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getUpper(), + rangeFilter.isUpperInclusive()); + } else if (rangeFilter.getUpper() == MAX_CHAR && rangeFilter.isUpperInclusive()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = CharRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.isLowerInclusive()); + } else { + // Find the lower and upper bounds. + matches = CharRegionBinarySearchKernel.binarySearchMinMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.getUpper(), + rangeFilter.isLowerInclusive(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionDouble.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionDouble.java index 51cb425ba03..14f628ce0d7 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionDouble.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionDouble.java @@ -8,13 +8,32 @@ package io.deephaven.parquet.table.region; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.DoubleRangeFilter; +import io.deephaven.engine.table.impl.select.MatchFilter; +import io.deephaven.engine.table.impl.select.RangeFilter; +import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionDouble; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownAction; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownFilterContext; +import io.deephaven.engine.table.impl.sources.regioned.kernel.DoubleRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPage; import org.jetbrains.annotations.NotNull; +import java.util.List; + +import static io.deephaven.util.QueryConstants.MAX_DOUBLE; +import static io.deephaven.util.QueryConstants.NULL_DOUBLE; + /** * {@link ColumnRegionDouble} implementation for regions that support fetching primitive doubles from * {@link ColumnChunkPageStore column chunk page stores}. @@ -22,11 +41,18 @@ public final class ParquetColumnRegionDouble extends ParquetColumnRegionBase implements ColumnRegionDouble, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + public ParquetColumnRegionDouble(@NotNull final ColumnChunkPageStore columnChunkPageStore, @NotNull final ColumnLocation columnLocation) { super(columnChunkPageStore.mask(), columnChunkPageStore, columnLocation); } - // region getBytes // endregion getBytes @@ -39,4 +65,136 @@ public double getDouble(final long rowKey) { throw new TableDataException("Error retrieving double at row key " + rowKey + " from a parquet table", e); } } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filters can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = DoubleRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof DoubleRangeFilter) { + final DoubleRangeFilter rangeFilter = (DoubleRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.getLower() == NULL_DOUBLE && rangeFilter.isLowerInclusive()) { + // Only need to find the upper bound, as the lower bound includes all values. + matches = DoubleRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getUpper(), + rangeFilter.isUpperInclusive()); + } else if (rangeFilter.getUpper() == MAX_DOUBLE && rangeFilter.isUpperInclusive()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = DoubleRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.isLowerInclusive()); + } else { + // Find the lower and upper bounds. + matches = DoubleRegionBinarySearchKernel.binarySearchMinMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.getUpper(), + rangeFilter.isLowerInclusive(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionFloat.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionFloat.java index 3ba25d4ea73..b823f2b1fae 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionFloat.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionFloat.java @@ -8,13 +8,32 @@ package io.deephaven.parquet.table.region; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.FloatRangeFilter; +import io.deephaven.engine.table.impl.select.MatchFilter; +import io.deephaven.engine.table.impl.select.RangeFilter; +import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionFloat; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownAction; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownFilterContext; +import io.deephaven.engine.table.impl.sources.regioned.kernel.FloatRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPage; import org.jetbrains.annotations.NotNull; +import java.util.List; + +import static io.deephaven.util.QueryConstants.MAX_FLOAT; +import static io.deephaven.util.QueryConstants.NULL_FLOAT; + /** * {@link ColumnRegionFloat} implementation for regions that support fetching primitive floats from * {@link ColumnChunkPageStore column chunk page stores}. @@ -22,11 +41,18 @@ public final class ParquetColumnRegionFloat extends ParquetColumnRegionBase implements ColumnRegionFloat, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + public ParquetColumnRegionFloat(@NotNull final ColumnChunkPageStore columnChunkPageStore, @NotNull final ColumnLocation columnLocation) { super(columnChunkPageStore.mask(), columnChunkPageStore, columnLocation); } - // region getBytes // endregion getBytes @@ -39,4 +65,136 @@ public float getFloat(final long rowKey) { throw new TableDataException("Error retrieving float at row key " + rowKey + " from a parquet table", e); } } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filters can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = FloatRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof FloatRangeFilter) { + final FloatRangeFilter rangeFilter = (FloatRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.getLower() == NULL_FLOAT && rangeFilter.isLowerInclusive()) { + // Only need to find the upper bound, as the lower bound includes all values. + matches = FloatRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getUpper(), + rangeFilter.isUpperInclusive()); + } else if (rangeFilter.getUpper() == MAX_FLOAT && rangeFilter.isUpperInclusive()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = FloatRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.isLowerInclusive()); + } else { + // Find the lower and upper bounds. + matches = FloatRegionBinarySearchKernel.binarySearchMinMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.getUpper(), + rangeFilter.isLowerInclusive(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionInt.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionInt.java index af7e33bdced..845719b6c8a 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionInt.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionInt.java @@ -8,13 +8,32 @@ package io.deephaven.parquet.table.region; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.IntRangeFilter; +import io.deephaven.engine.table.impl.select.MatchFilter; +import io.deephaven.engine.table.impl.select.RangeFilter; +import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionInt; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownAction; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownFilterContext; +import io.deephaven.engine.table.impl.sources.regioned.kernel.IntRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPage; import org.jetbrains.annotations.NotNull; +import java.util.List; + +import static io.deephaven.util.QueryConstants.MAX_INT; +import static io.deephaven.util.QueryConstants.NULL_INT; + /** * {@link ColumnRegionInt} implementation for regions that support fetching primitive ints from * {@link ColumnChunkPageStore column chunk page stores}. @@ -22,11 +41,18 @@ public final class ParquetColumnRegionInt extends ParquetColumnRegionBase implements ColumnRegionInt, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + public ParquetColumnRegionInt(@NotNull final ColumnChunkPageStore columnChunkPageStore, @NotNull final ColumnLocation columnLocation) { super(columnChunkPageStore.mask(), columnChunkPageStore, columnLocation); } - // region getBytes // endregion getBytes @@ -39,4 +65,136 @@ public int getInt(final long rowKey) { throw new TableDataException("Error retrieving int at row key " + rowKey + " from a parquet table", e); } } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filters can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = IntRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof IntRangeFilter) { + final IntRangeFilter rangeFilter = (IntRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.getLower() == NULL_INT && rangeFilter.isLowerInclusive()) { + // Only need to find the upper bound, as the lower bound includes all values. + matches = IntRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getUpper(), + rangeFilter.isUpperInclusive()); + } else if (rangeFilter.getUpper() == MAX_INT && rangeFilter.isUpperInclusive()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = IntRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.isLowerInclusive()); + } else { + // Find the lower and upper bounds. + matches = IntRegionBinarySearchKernel.binarySearchMinMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.getUpper(), + rangeFilter.isLowerInclusive(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionLong.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionLong.java index 309e1685904..5728f977985 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionLong.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionLong.java @@ -8,13 +8,32 @@ package io.deephaven.parquet.table.region; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.LongRangeFilter; +import io.deephaven.engine.table.impl.select.MatchFilter; +import io.deephaven.engine.table.impl.select.RangeFilter; +import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionLong; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownAction; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownFilterContext; +import io.deephaven.engine.table.impl.sources.regioned.kernel.LongRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPage; import org.jetbrains.annotations.NotNull; +import java.util.List; + +import static io.deephaven.util.QueryConstants.MAX_LONG; +import static io.deephaven.util.QueryConstants.NULL_LONG; + /** * {@link ColumnRegionLong} implementation for regions that support fetching primitive longs from * {@link ColumnChunkPageStore column chunk page stores}. @@ -22,11 +41,18 @@ public final class ParquetColumnRegionLong extends ParquetColumnRegionBase implements ColumnRegionLong, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + public ParquetColumnRegionLong(@NotNull final ColumnChunkPageStore columnChunkPageStore, @NotNull final ColumnLocation columnLocation) { super(columnChunkPageStore.mask(), columnChunkPageStore, columnLocation); } - // region getBytes // endregion getBytes @@ -39,4 +65,136 @@ public long getLong(final long rowKey) { throw new TableDataException("Error retrieving long at row key " + rowKey + " from a parquet table", e); } } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filters can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = LongRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof LongRangeFilter) { + final LongRangeFilter rangeFilter = (LongRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.getLower() == NULL_LONG && rangeFilter.isLowerInclusive()) { + // Only need to find the upper bound, as the lower bound includes all values. + matches = LongRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getUpper(), + rangeFilter.isUpperInclusive()); + } else if (rangeFilter.getUpper() == MAX_LONG && rangeFilter.isUpperInclusive()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = LongRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.isLowerInclusive()); + } else { + // Find the lower and upper bounds. + matches = LongRegionBinarySearchKernel.binarySearchMinMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.getUpper(), + rangeFilter.isLowerInclusive(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionObject.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionObject.java index 7b365c5f01a..190692f2ff3 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionObject.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionObject.java @@ -4,10 +4,16 @@ package io.deephaven.parquet.table.region; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; -import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionLong; -import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionObject; -import io.deephaven.engine.table.impl.sources.regioned.RegionVisitResult; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.*; +import io.deephaven.engine.table.impl.sources.regioned.*; +import io.deephaven.engine.table.impl.sources.regioned.kernel.ObjectRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.table.impl.chunkattributes.DictionaryKeys; @@ -15,6 +21,7 @@ import io.deephaven.engine.rowset.RowSet; import org.jetbrains.annotations.NotNull; +import java.util.List; import java.util.function.Supplier; /** @@ -24,6 +31,14 @@ public final class ParquetColumnRegionObject extends ParquetColumnRegionBase implements ColumnRegionObject, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + private volatile Supplier> dictionaryKeysRegionSupplier; private volatile Supplier> dictionaryValuesRegionSupplier; @@ -82,4 +97,137 @@ public ColumnRegionObject getDictionaryValuesRegion() { } return dictionaryValuesRegion; } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + // Can't push down case-insensitive match filters to binary search + if (ctx.isMatchFilter() && ctx.filter() instanceof MatchFilter && + ((MatchFilter) ctx.filter()).getMatchOptions().caseInsensitive()) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // Can't push down case-insensitive match filters to binary search + if (ctx.isMatchFilter() && ctx.filter() instanceof MatchFilter && + ((MatchFilter) ctx.filter()).getMatchOptions().caseInsensitive()) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = ObjectRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof SingleSidedComparableRangeFilter) { + final SingleSidedComparableRangeFilter rangeFilter = + (SingleSidedComparableRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.isGreaterThan()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = ObjectRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getPivot(), + rangeFilter.isLowerInclusive()); + } else { + // Only need to find the upper bound, as the lower bound includes all values. + matches = ObjectRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getPivot(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionShort.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionShort.java index 2434682e0aa..18c6e414659 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionShort.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/region/ParquetColumnRegionShort.java @@ -8,13 +8,32 @@ package io.deephaven.parquet.table.region; import io.deephaven.engine.table.impl.locations.ColumnLocation; +import io.deephaven.api.SortColumn; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.rowset.RowSetFactory; +import io.deephaven.engine.table.impl.PushdownFilterContext; +import io.deephaven.engine.table.impl.PushdownResult; +import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.engine.table.impl.locations.TableLocation; +import io.deephaven.engine.table.impl.select.ShortRangeFilter; +import io.deephaven.engine.table.impl.select.MatchFilter; +import io.deephaven.engine.table.impl.select.RangeFilter; +import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.engine.table.impl.sources.regioned.ColumnRegionShort; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownAction; +import io.deephaven.engine.table.impl.sources.regioned.RegionedPushdownFilterContext; +import io.deephaven.engine.table.impl.sources.regioned.kernel.ShortRegionBinarySearchKernel; import io.deephaven.parquet.table.pagestore.ColumnChunkPageStore; import io.deephaven.chunk.attributes.Any; import io.deephaven.engine.page.ChunkPage; import org.jetbrains.annotations.NotNull; +import java.util.List; + +import static io.deephaven.util.QueryConstants.MAX_SHORT; +import static io.deephaven.util.QueryConstants.NULL_SHORT; + /** * {@link ColumnRegionShort} implementation for regions that support fetching primitive shorts from * {@link ColumnChunkPageStore column chunk page stores}. @@ -22,11 +41,18 @@ public final class ParquetColumnRegionShort extends ParquetColumnRegionBase implements ColumnRegionShort, ParquetColumnRegion { + private static final RegionedPushdownAction.Region SORTED_REGION_ACTION = + new RegionedPushdownAction.Region( + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_SORTED_COLUMN_LOCATION, + PushdownResult.REGION_SORTED_DATA_COST, + (ctx) -> ctx.isMatchFilter() || ctx.isRangeFilter(), + (tl, cr) -> true); + private static final List SUPPORTED_ACTIONS = List.of(SORTED_REGION_ACTION); + public ParquetColumnRegionShort(@NotNull final ColumnChunkPageStore columnChunkPageStore, @NotNull final ColumnLocation columnLocation) { super(columnChunkPageStore.mask(), columnChunkPageStore, columnLocation); } - // region getBytes // endregion getBytes @@ -39,4 +65,136 @@ public short getShort(final long rowKey) { throw new TableDataException("Error retrieving short at row key " + rowKey + " from a parquet table", e); } } + + @Override + public List supportedActions() { + return SUPPORTED_ACTIONS; + } + + @Override + public long estimatePushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.EstimateContext estimateContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + // Only range and match filters can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + + if (firstSortedColumn != null) { + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (firstSortedColumn.column().name().equals(renamedCol)) { + return action.filterCost(); + } + } + } + return PushdownResult.UNSUPPORTED_ACTION_COST; + } + + @Override + public PushdownResult performPushdownAction( + final RegionedPushdownAction action, + final WhereFilter filter, + final RowSet selection, + final PushdownResult input, + final boolean usePrev, + final PushdownFilterContext filterContext, + final RegionedPushdownAction.ActionContext actionContext) { + if (action.equals(SORTED_REGION_ACTION)) { + final RegionedPushdownFilterContext ctx = (RegionedPushdownFilterContext) filterContext; + + final TableLocation tableLocation = getColumnLocation().map(ColumnLocation::getTableLocation).orElse(null); + if (tableLocation == null || (!ctx.isMatchFilter() && !ctx.isRangeFilter())) { + return input.copy(); + } + // Only range and match filers can benefit from sorted column data. + final SortColumn firstSortedColumn = tableLocation.getSortedColumns().isEmpty() + ? null + : tableLocation.getSortedColumns().get(0); + if (firstSortedColumn == null) { + return input.copy(); + } + + // Handle renames + final String col = filter.getColumns().get(0); + final String renamedCol = ctx.filterColumnToManagerColumnName().getOrDefault(col, col); + if (!firstSortedColumn.column().name().equals(renamedCol)) { + return input.copy(); + } + + // We will use the effective filter from the context, which may bypass row tracking but provides the + // raw filter that we need to apply to the sorted column. + final WhereFilter effectiveFilter = ctx.filter(); + + if (ctx.isMatchFilter()) { + final MatchFilter matchFilter = (MatchFilter) effectiveFilter; + try (final RowSet matches = ShortRegionBinarySearchKernel.binarySearchMatch( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + matchFilter.getValues())) { + // Handle normal / inverted match filters: + return PushdownResult.of(selection, matchFilter.getMatchOptions().inverted() + ? selection.minus(matches) + : matches.intersect(selection), RowSetFactory.empty()); + } + } + + if (ctx.isRangeFilter() && effectiveFilter instanceof RangeFilter + && ((RangeFilter) effectiveFilter).getRealFilter() instanceof ShortRangeFilter) { + final ShortRangeFilter rangeFilter = (ShortRangeFilter) ((RangeFilter) effectiveFilter).getRealFilter(); + final RowSet matches; + if (rangeFilter.getLower() == NULL_SHORT && rangeFilter.isLowerInclusive()) { + // Only need to find the upper bound, as the lower bound includes all values. + matches = ShortRegionBinarySearchKernel.binarySearchMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getUpper(), + rangeFilter.isUpperInclusive()); + } else if (rangeFilter.getUpper() == MAX_SHORT && rangeFilter.isUpperInclusive()) { + // Only need to find the lower bound, as the upper bound includes all values. + matches = ShortRegionBinarySearchKernel.binarySearchMin( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.isLowerInclusive()); + } else { + // Find the lower and upper bounds. + matches = ShortRegionBinarySearchKernel.binarySearchMinMax( + this, + selection.firstRowKey(), + selection.lastRowKey(), + firstSortedColumn, + rangeFilter.getLower(), + rangeFilter.getUpper(), + rangeFilter.isLowerInclusive(), + rangeFilter.isUpperInclusive()); + } + try (final RowSet ignored = matches) { + return PushdownResult.of( + selection, + selection.subSetByKeyRange(matches.firstRowKey(), matches.lastRowKey()), + RowSetFactory.empty()); + } + } + return input.copy(); + } + return input.copy(); + } } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableFilterTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableFilterTest.java index 27a4628d222..02fe32b30bc 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableFilterTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableFilterTest.java @@ -2104,47 +2104,61 @@ public void testLocationDataIndexWithFilterBarriers() { final List allFilters = List.of(filterA, filterB); Table result; + Filter f; // Test with no barrier, expect A then B - result = diskTable.where(Filter.and(filterA, filterB)); - assertEquals(97, filterA.numRowsProcessed()); // only indexA rows + f = Filter.and(filterA, filterB); + result = diskTable.where(f).coalesce(); + // A has an index, stored as sorted data. Applying to the index results in bin search, bypassing filter + // row set capture. + assertEquals(0, filterA.numRowsProcessed()); assertEquals(51550, filterB.numRowsProcessed()); assertEquals(23435, result.size()); + assertTableEquals(memTable.where(f).coalesce(), result); allFilters.forEach(RowSetCapturingFilter::reset); // Test with no barrier, expect A then B despite user ordering - result = diskTable.where(Filter.and(filterB, filterA)); - assertEquals(97, filterA.numRowsProcessed()); // only indexA rows + f = Filter.and(filterB, filterA); + result = diskTable.where(f).coalesce(); + assertEquals(0, filterA.numRowsProcessed()); assertEquals(51550, filterB.numRowsProcessed()); assertEquals(23435, result.size()); + assertTableEquals(memTable.where(f).coalesce(), result); allFilters.forEach(RowSetCapturingFilter::reset); // Barrier to force B then A - result = diskTable.where(Filter.and(filterB.withDeclaredBarriers("b1"), filterA.withRespectedBarriers("b1"))); - assertEquals(97, filterA.numRowsProcessed()); // only indexA rows + f = Filter.and(filterB.withDeclaredBarriers("b1"), filterA.withRespectedBarriers("b1")); + result = diskTable.where(f).coalesce(); + assertEquals(0, filterA.numRowsProcessed()); assertEquals(100_000, filterB.numRowsProcessed()); assertEquals(23435, result.size()); + assertTableEquals(memTable.where(f).coalesce(), result); allFilters.forEach(RowSetCapturingFilter::reset); // Barrier to force B then A - result = diskTable.where(Filter.and(filterB.withSerial(), filterA)); - assertEquals(97, filterA.numRowsProcessed()); // only indexA rows + f = Filter.and(filterB.withSerial(), filterA); + result = diskTable.where(f).coalesce(); + assertEquals(0, filterA.numRowsProcessed()); assertEquals(100_000, filterB.numRowsProcessed()); assertEquals(23435, result.size()); + assertTableEquals(memTable.where(f).coalesce(), result); allFilters.forEach(RowSetCapturingFilter::reset); // Inverted - Barrier to force B then A - result = diskTable.where(Filter.and( + f = Filter.and( WhereFilterInvertedImpl.of(filterB.withDeclaredBarriers("b1")), - WhereFilterInvertedImpl.of(filterA.withRespectedBarriers("b1")))); - assertEquals(97, filterA.numRowsProcessed()); // only indexA rows + WhereFilterInvertedImpl.of(filterA.withRespectedBarriers("b1"))); + result = diskTable.where(f).coalesce(); + // filterA not recognized as a range filter (because of the inversion), so applied to index table rows. + assertEquals(97, filterA.numRowsProcessed()); assertEquals(100_000, filterB.numRowsProcessed()); assertEquals(26430, result.size()); + assertTableEquals(memTable.where(f).coalesce(), result); allFilters.forEach(RowSetCapturingFilter::reset); } @@ -2192,12 +2206,10 @@ public void testPartitioningTableColumnRegions() { Table result; - result = diskTable.where(capturingFilterSym); + result = diskTable.where(capturingFilterSym).coalesce(); - // 583 is explained as follows. There are 6 * 97 = 582 regions where Sym is not-null which are tested. - // There are 97 null regions for Sym which are all covered by a single test (to determine filter null - // behavior). - assertEquals(583, capturingFilterSym.numRowsProcessed()); + // This filter is executed as a chunk filter over the constant regions, no rows are logged. + assertEquals(0, capturingFilterSym.numRowsProcessed()); assertEquals(28572, result.size()); // Use the unwrapped filter to test other optimization paths (i.e. chunk filtering) and assert equality. @@ -2208,9 +2220,10 @@ public void testPartitioningTableColumnRegions() { ////////////////////////////////////////////////////// - result = diskTable.where(capturingFilterSymConditional); + // This filter is executed as a chunk filter over the constant regions, no rows are logged. + result = diskTable.where(capturingFilterSymConditional).coalesce(); - assertEquals(583, capturingFilterSymConditional.numRowsProcessed()); + assertEquals(0, capturingFilterSymConditional.numRowsProcessed()); assertEquals(28572, result.size()); // Use the unwrapped filter to test other optimization paths (i.e. chunk filtering) and assert equality. @@ -2223,9 +2236,8 @@ public void testPartitioningTableColumnRegions() { result = diskTable.where(capturingFilterA).coalesce(); - // 673 is explained as follows. There are 7 * 96 = 672 regions where A is not null. There are 6 null - // regions for A, which are covered by a single test for filter null behavior. - assertEquals(673, capturingFilterA.numRowsProcessed()); + // This filter is executed as a chunk filter over the constant regions, no rows are logged. + assertEquals(0, capturingFilterA.numRowsProcessed()); assertEquals(51550, result.size()); // Use the unwrapped filter to test other optimization paths (i.e. chunk filtering) and assert equality. @@ -2236,9 +2248,10 @@ public void testPartitioningTableColumnRegions() { ////////////////////////////////////////////////////// + // This filter is executed as a chunk filter over the constant regions, no rows are logged. result = diskTable.where(capturingFilterAConditional).coalesce(); - assertEquals(673, capturingFilterAConditional.numRowsProcessed()); + assertEquals(0, capturingFilterAConditional.numRowsProcessed()); assertEquals(51550, result.size()); // Use the unwrapped filter to test other optimization paths (i.e. chunk filtering) and assert equality. @@ -2249,7 +2262,7 @@ public void testPartitioningTableColumnRegions() { ////////////////////////////////////////////////////// - result = diskTable.where(capturingFilterB); + result = diskTable.where(capturingFilterB).coalesce(); // All rows to be tested. assertEquals(100000, capturingFilterB.numRowsProcessed()); @@ -2263,12 +2276,12 @@ public void testPartitioningTableColumnRegions() { ////////////////////////////////////////////////////// + // This filter is executed as a chunk filter over the constant regions, no rows are logged. result = diskTable.where(Filter.and(capturingFilterSym, capturingFilterA)).coalesce(); - // All regions tested for Sym match - assertEquals(583, capturingFilterSym.numRowsProcessed()); + assertEquals(0, capturingFilterSym.numRowsProcessed()); // A subset of regions tested for A match - assertEquals(193, capturingFilterA.numRowsProcessed()); + assertEquals(0, capturingFilterA.numRowsProcessed()); assertEquals(14729, result.size()); // Use the unwrapped filter to test other optimization paths (i.e. chunk filtering) and assert equality. diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index f1dc2542736..0acf9c01063 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -5,8 +5,10 @@ import io.deephaven.UncheckedDeephavenException; import io.deephaven.api.ColumnName; +import io.deephaven.api.RawString; import io.deephaven.api.Selectable; import io.deephaven.api.SortColumn; +import io.deephaven.api.filter.Filter; import io.deephaven.base.FileUtils; import io.deephaven.base.verify.Assert; import io.deephaven.engine.context.ExecutionContext; @@ -37,9 +39,7 @@ import io.deephaven.engine.table.impl.indexer.DataIndexer; import io.deephaven.engine.table.impl.locations.ColumnLocation; import io.deephaven.engine.table.impl.locations.impl.StandaloneTableKey; -import io.deephaven.engine.table.impl.select.FormulaEvaluationException; -import io.deephaven.engine.table.impl.select.FunctionalColumn; -import io.deephaven.engine.table.impl.select.SelectColumn; +import io.deephaven.engine.table.impl.select.*; import io.deephaven.engine.table.impl.sources.ReinterpretUtils; import io.deephaven.engine.table.impl.util.ColumnHolder; import io.deephaven.engine.table.iterators.*; @@ -5033,6 +5033,136 @@ public void testReadEnumLogicalTypeAsString() throws IOException { checkSingleTable(newTable(stringCol("status", "RED", "GREEN", "BLUE")), dest); } + private void testSortedFilteringInternal(final Table table, final String columnName, final String filter) { + testSortedFilteringInternal(table, columnName, RawString.of(filter)); + } + + private void testSortedFilteringInternal( + final Table source, + final String columnName, + final Filter filter) { + + Table sortedAsc = source.sort(columnName); + final File destAsc = new File(rootFile, "ParquetTest_sortedColumnFilteringAsc.parquet"); + writeTable(sortedAsc, destAsc.getPath()); + final Table fromDiskAsc = checkSingleTable(sortedAsc, destAsc); + Table resultAsc = fromDiskAsc.where(filter); + assertTableEquals(sortedAsc.where(filter), resultAsc); + + // Perform the filter on an already filtered table. + resultAsc = fromDiskAsc.where("ii % 2 == 0").where(filter); + assertTableEquals(sortedAsc.where("ii % 2 == 0").where(filter), resultAsc); + + // Also verify that the descending sort has the same results. + final Table sortedDesc = source.sortDescending(columnName); + final File destDesc = new File(rootFile, "ParquetTest_sortedColumnFilteringDesc.parquet"); + writeTable(sortedDesc, destDesc.getPath()); + final Table fromDiskDesc = checkSingleTable(sortedDesc, destDesc); + Table resultDesc = fromDiskDesc.where(filter); + assertTableEquals(sortedDesc.where(filter), resultDesc); + + // Perform the filter on an already filtered table. + resultDesc = fromDiskDesc.where("ii % 2 == 0").where(filter); + assertTableEquals(sortedDesc.where("ii % 2 == 0").where(filter), resultDesc); + } + + @Test + public void testSortedColumnFiltering() { + // Need to disable metadata to trigger the estimation action on the regions and force coverage. + final boolean restore = QueryTable.DISABLE_WHERE_PUSHDOWN_PARQUET_ROW_GROUP_METADATA; + try (final SafeCloseable ignored = + () -> QueryTable.DISABLE_WHERE_PUSHDOWN_PARQUET_ROW_GROUP_METADATA = restore) { + QueryTable.DISABLE_WHERE_PUSHDOWN_PARQUET_ROW_GROUP_METADATA = true; + final Table testTable = TableTools.emptyTable(10_000) + .update( + "byteCol = i % 97 == 0 ? null : (byte)(i % 97)", + "charCol = i % 997 == 0 ? null : (char)(i % 997)", + "shortCol = i % 997 == 0 ? null : (short)(i % 997)", + "intCol = i % 997 == 0 ? null : i % 997", + "longCol = i % 997 == 0 ? null : ii % 997", + "floatCol = (i % 997 == 0) ? null : (i % 997 == 996) ? Float.NaN : (i % 997 == 995) ? Float.POSITIVE_INFINITY : (i % 997 == 994) ? Float.NEGATIVE_INFINITY : (float)(i % 997)", + "doubleCol = (i % 997 == 0) ? null : (i % 997 == 996) ? Double.NaN : (i % 997 == 995) ? Double.POSITIVE_INFINITY : (i % 997 == 994) ? Double.NEGATIVE_INFINITY : (double)(i % 997)", + "stringCol = i % 997 == 0 ? null : `Str` + (i % 997)"); + + testSortedFilteringInternal(testTable, "byteCol", "byteCol in 30, 50, 70"); + testSortedFilteringInternal(testTable, "byteCol", "byteCol > 30"); + testSortedFilteringInternal(testTable, "byteCol", "byteCol <= 50"); + + testSortedFilteringInternal(testTable, "charCol", "charCol in 'a', 'b', 'c'"); + testSortedFilteringInternal(testTable, "charCol", "charCol > 'a'"); + testSortedFilteringInternal(testTable, "charCol", "charCol <= 'b'"); + + testSortedFilteringInternal(testTable, "shortCol", "shortCol in 300, 500, 700"); + testSortedFilteringInternal(testTable, "shortCol", "shortCol > 300"); + testSortedFilteringInternal(testTable, "shortCol", "shortCol <= 500"); + + testSortedFilteringInternal(testTable, "intCol", "intCol in 300, 500, 700"); + testSortedFilteringInternal(testTable, "intCol", "intCol > 300"); + testSortedFilteringInternal(testTable, "intCol", "intCol <= 500"); + + testSortedFilteringInternal(testTable, "longCol", "longCol in 300, 500, 700"); + testSortedFilteringInternal(testTable, "longCol", "longCol > 300"); + testSortedFilteringInternal(testTable, "longCol", "longCol <= 500"); + + testSortedFilteringInternal(testTable, "floatCol", "floatCol in 300.0, 500.0, 700.0"); + testSortedFilteringInternal(testTable, "floatCol", "floatCol in NaN"); + testSortedFilteringInternal(testTable, "floatCol", "floatCol > 300.0"); + testSortedFilteringInternal(testTable, "floatCol", "floatCol <= 500.0"); + testSortedFilteringInternal(testTable, "floatCol", "floatCol > Float.POSITIVE_INFINITY"); + testSortedFilteringInternal(testTable, "floatCol", "floatCol >= NaN"); + + testSortedFilteringInternal(testTable, "doubleCol", "doubleCol in 300.0, 500.0, 700.0"); + testSortedFilteringInternal(testTable, "doubleCol", "doubleCol in NaN"); + testSortedFilteringInternal(testTable, "doubleCol", "doubleCol > 300.0"); + testSortedFilteringInternal(testTable, "doubleCol", "doubleCol <= 500.0"); + testSortedFilteringInternal(testTable, "doubleCol", "doubleCol > Float.POSITIVE_INFINITY"); + testSortedFilteringInternal(testTable, "doubleCol", "doubleCol >= NaN"); + + testSortedFilteringInternal(testTable, "stringCol", "stringCol in `Str300`, `Str500`, `Str700`"); + testSortedFilteringInternal(testTable, "stringCol", "stringCol > `Str300`"); + testSortedFilteringInternal(testTable, "stringCol", "stringCol <= `Str500`"); + } + } + + @Test + public void testSortedColumnDescFiltering() { + final Table testDesc = TableTools.emptyTable(100_000) + .update("A = i % 97 == 0 ? null : i % 97", "B = i % 997 == 0 ? null : i % 997") + .sortDescending("B"); + + final File dest = new File(rootFile, "ParquetTest_sortedColumnFiltering.parquet"); + writeTable(testDesc, dest.getPath()); + + final Table fromDisk = checkSingleTable(testDesc, dest); + + Table result; + Filter f; + + f = RawString.of("A in 50, 30, 20"); + result = fromDisk.where(f); + assertTableEquals(testDesc.where(f), result); + + f = RawString.of("B in 500, 300, 200"); + result = fromDisk.where(f); + assertTableEquals(testDesc.where(f), result); + + f = RawString.of("A > 30"); + result = fromDisk.where(f); + assertTableEquals(testDesc.where(f), result); + + f = RawString.of("B > 300"); + result = fromDisk.where(f); + assertTableEquals(testDesc.where(f), result); + + f = RawString.of("A < 30"); + result = fromDisk.where(f); + assertTableEquals(testDesc.where(f), result); + + f = RawString.of("B < 300"); + result = fromDisk.where(f); + assertTableEquals(testDesc.where(f), result); + } + private void assertTableStatistics(Table inputTable, File dest) { // Verify that the columns have the correct statistics. final ParquetMetadata metadata = diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateRegionAndRegionedSourceTests.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateRegionAndRegionedSourceTests.java index 7e66264248e..e204aafa01b 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateRegionAndRegionedSourceTests.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateRegionAndRegionedSourceTests.java @@ -5,8 +5,7 @@ import java.io.IOException; -import static io.deephaven.replication.ReplicatePrimitiveCode.charToAllButBoolean; -import static io.deephaven.replication.ReplicatePrimitiveCode.charToAllButBooleanAndByte; +import static io.deephaven.replication.ReplicatePrimitiveCode.*; /** * Code generation for tests of {@link RegionedColumnSource} implementations as well as well as the primary region @@ -19,7 +18,9 @@ public static void main(String... args) throws IOException { "engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/TestRegionedColumnSourceChar.java"); charToAllButBooleanAndByte("replicateRegionAndRegionedSourceTests", "engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/TstColumnRegionChar.java"); - charToAllButBoolean("replicateRegionAndRegionedSourceTests", + charToAllButBooleanAndFloats("replicateRegionAndRegionedSourceTests", "engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/CharRegionBinarySearchKernelTest.java"); + floatToAllFloatingPoints("replicateRegionAndRegionedSourceTests", + "engine/table/src/test/java/io/deephaven/engine/table/impl/sources/regioned/kernel/FloatRegionBinarySearchKernelTest.java"); } }