diff --git a/src/indexing.jl b/src/indexing.jl index 5d1b22a..8869038 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -184,30 +184,67 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval) # Or repeated intervals, which only work if the axis is a range since otherwise # there will be a non-constant number of indices in each repetition. -# Two small tricks are used here: -# * Compute the resulting interval axis with unsafe indexing without any offset -# - Since it's a range, we can do this, and it makes the resulting axis useful -# * Snap the offsets to the nearest datapoint to avoid fencepost problems -# Adds a dimension to the result; rows represent the interval and columns are offsets. +# +# There are a number of challenges here: +# * This operation adds a dimension to the result; rows represent the interval +# (or subset) and columns are offsets (or repetition). A RepeatedRangeMatrix +# represents the resulting matrix of indices very nicely. +# * We also want the returned matrix to keep track of its axes; the axis +# subset (ax_sub) is the relative location of the interval with respect to +# each offset, and the repetitions (ax_rep) is the array of offsets. +# * We are interested in the resulting *addition* of the interval against the +# offsets. Either the offsets or the interval may independently be out of +# bounds prior to this addition. Even worse: the interval may have different +# units than the axis (e.g., `(Day(-1)..Day(1)) + dates` for a three-day +# span around dates of interest over a Date axis). +# * It is possible (and likely!) that neither the interval endpoints nor the +# offsets fall exactly upon an axis value. Or even worse: the some offsets +# when added to the interval could span more elements than others (the +# fencepost problem). As such, we need to be careful about how and when we +# snap the provided intervals and offsets to exact axis values (and indices). +# +# To avoid the fencepost problems and to define the axes, we convert the +# interval to a UnitRange of relative indices and the array of offsets to an +# array of absolute indices (independently of each other). Exactly how we do so +# must be carefully considered. +# +# Note that this is fundamentally different than indexing by a single interval; +# whereas those intervals are specified in the same units as the elements of the +# axis itself, these intervals are specified in terms of _offsets_. At the same +# time, we want `A[interval] == vec(A[interval + [0]])`. To make these +# computations as similar as possible, we use a phony range of the form +# `step(ax):step(ax):step(ax)` in order to search for the interval. +phony_range(r::Range) = step(r):step(r):step(r) +phony_range(r::AbstractUnitRange) = step(r):step(r) +if VERSION < v"0.6-pre" + phony_range(r::FloatRange) = FloatRange(r.step, r.step, one(r.len), r.divisor) +else + phony_range(r::StepRangeLen) = StepRangeLen(r.step, r.step, 1) +end +function relativewindow(r::Range, x::ClosedInterval) + pr = phony_range(r) + idxs = Extrapolated.searchsorted(pr, x) + vals = Extrapolated.getindex(pr, idxs) + return (idxs, vals) +end + axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval) - n = length(idx.offsets) - idxs = unsafe_searchsorted(ax, idx.window) - offsets = [searchsortednearest(ax, idx.offsets[i]) for i=1:n] - AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[offsets])) + idxs, vals = relativewindow(ax, idx.window) + offsets = [Extrapolated.searchsortednearest(ax, offset) for offset in idx.offsets] + AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(Extrapolated.getindex(ax, offsets))) end # We also have special datatypes to represent intervals about indices axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::IntervalAtIndex) = searchsorted(ax, idx.window + ax[idx.index]) function axisindexes(::Type{Dimensional}, ax::Range, idx::IntervalAtIndex) - idxs = unsafe_searchsorted(ax, idx.window) - AxisArray(idxs + idx.index, Axis{:sub}(inbounds_getindex(ax, idxs))) + idxs, vals = relativewindow(ax, idx.window) + AxisArray(idxs + idx.index, Axis{:sub}(vals)) end axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedIntervalAtIndexes) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedIntervalAtIndexes) - n = length(idx.indexes) - idxs = unsafe_searchsorted(ax, idx.window) - AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[idx.indexes])) + idxs, vals = relativewindow(ax, idx.window) + AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(vals), Axis{:rep}(ax[idx.indexes])) end # Categorical axes may be indexed by their elements diff --git a/src/search.jl b/src/search.jl index a47ccb2..056a429 100644 --- a/src/search.jl +++ b/src/search.jl @@ -15,78 +15,92 @@ function searchsortednearest(vec::AbstractVector, x) end return idx end +# Base only specializes searching ranges by Numbers; so optimize for Intervals +function Base.searchsorted(a::Range, I::ClosedInterval) + searchsortedfirst(a, I.left):searchsortedlast(a, I.right) +end -# We depend upon extrapolative behaviors in searching ranges to shift axes. -# This can be done by stealing Base's implementations and removing the bounds- -# correcting min/max. +""" +The internal `Extrapolated` module contains implementations for indexing and +searching into ranges beyond their bounds. The `@inbounds` macro is not +sufficient since it can be turned off by `--check-bounds=yes`. +""" +module Extrapolated +using ..ClosedInterval + +function searchsortednearest(vec::Range, x) + idx = searchsortedfirst(vec, x) # Returns the first idx | vec[idx] >= x + if (getindex(vec, idx) - x) > (x - getindex(vec, idx-1)) + idx -= 1 # The previous element is closer + end + return idx +end -# TODO: This could plug into the sorting system better, but it's fine for now -# TODO: This needs to support Dates. """ - unsafe_searchsorted(a::Range, I::ClosedInterval) + searchsorted(a::Range, I::ClosedInterval) Return the indices of the range that fall within an interval without checking bounds, possibly extrapolating outside the range if needed. """ -function unsafe_searchsorted(a::Range, I::ClosedInterval) - unsafe_searchsortedfirst(a, I.left):unsafe_searchsortedlast(a, I.right) -end -# Base only specializes searching ranges by Numbers; so optimize for Intervals -function Base.searchsorted(a::Range, I::ClosedInterval) +function searchsorted(a::Range, I::ClosedInterval) searchsortedfirst(a, I.left):searchsortedlast(a, I.right) end -# When running with "--check-bounds=yes" (like on Travis), the bounds-check isn't elided -@inline function inbounds_getindex{T}(v::Range{T}, i::Integer) +# When running with "--check-bounds=yes`(like on Travis), the bounds-check isn't elided +@inline function getindex{T}(v::Range{T}, i::Integer) convert(T, first(v) + (i-1)*step(v)) end -@inline function inbounds_getindex{T<:Integer}(r::Range, s::Range{T}) +@inline function getindex{T<:Integer}(r::Range, s::Range{T}) f = first(r) st = oftype(f, f + (first(s)-1)*step(r)) range(st, step(r)*step(s), length(s)) end +getindex(r::Range, I::Array) = [getindex(r, i) for i in I] if VERSION < v"0.6.0-dev.2390" - include_string(""" - @inline function inbounds_getindex{T}(r::FloatRange{T}, i::Integer) + @inline function getindex{T}(r::FloatRange{T}, i::Integer) convert(T, (r.start + (i-1)*r.step)/r.divisor) end - @inline function inbounds_getindex(r::FloatRange, s::OrdinalRange) + @inline function getindex(r::FloatRange, s::OrdinalRange) FloatRange(r.start + (first(s)-1)*r.step, step(s)*r.step, length(s), r.divisor) end - """) else - include_string(""" - @inline inbounds_getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i) - @inline function inbounds_getindex(r::StepRangeLen, s::OrdinalRange) - vfirst = Base.unsafe_getindex(r, first(s)) - StepRangeLen(vfirst, step(r)*step(s), length(s)) + @inline getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i) + @inline function getindex(r::StepRangeLen, s::AbstractUnitRange) + soffset = 1 + (r.offset - first(s)) + soffset = clamp(soffset, 1, length(s)) + ioffset = first(s) + (soffset-1) + if ioffset == r.offset + StepRangeLen(r.ref, r.step, length(s), max(1,soffset)) + else + StepRangeLen(r.ref + (ioffset-r.offset)*r.step, r.step, length(s), max(1,soffset)) + end end - """) end -function unsafe_searchsortedlast{T<:Number}(a::Range{T}, x::Number) +function searchsortedlast(a::Range, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) n = round(Integer,(x-first(a))/step(a))+1 - isless(x, inbounds_getindex(a, n)) ? n-1 : n + isless(x, getindex(a, n)) ? n-1 : n end -function unsafe_searchsortedfirst{T<:Number}(a::Range{T}, x::Number) +function searchsortedfirst(a::Range, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) n = round(Integer,(x-first(a))/step(a))+1 - isless(inbounds_getindex(a, n), x) ? n+1 : n + isless(getindex(a, n), x) ? n+1 : n end -function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Number) +function searchsortedlast{T<:Integer}(a::Range{T}, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) fld(floor(Integer,x)-first(a),step(a))+1 end -function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Number) +function searchsortedfirst{T<:Integer}(a::Range{T}, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) -fld(floor(Integer,-x)+first(a),step(a))+1 end -function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned) +function searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) -fld(first(a)-signed(x),step(a))+1 end -function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned) +function searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) fld(signed(x)-first(a),step(a))+1 end +end \ No newline at end of file diff --git a/test/indexing.jl b/test/indexing.jl index 00183d8..cca1748 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -70,6 +70,19 @@ B = AxisArray(reshape(1:15, 5,3), .1:.1:0.5, [:a, :b, :c]) @test B[Axis{:row}(ClosedInterval(0.15, 0.3))] == @view(B[Axis{:row}(ClosedInterval(0.15, 0.3))]) == B[2:3,:] +# Test indexing by Intervals that aren't of the form step:step:last +B = AxisArray(reshape(1:15, 5,3), 1.1:0.1:1.5, [:a, :b, :c]) +@test B[ClosedInterval(1.0, 1.5), :] == B[ClosedInterval(1.0, 1.5)] == B[:,:] +@test B[ClosedInterval(1.0, 1.3), :] == B[ClosedInterval(1.0, 1.3)] == B[1:3,:] +@test B[ClosedInterval(1.15, 1.3), :] == B[ClosedInterval(1.15, 1.3)] == B[2:3,:] +@test B[ClosedInterval(1.2, 1.5), :] == B[ClosedInterval(1.2, 1.5)] == B[2:end,:] +@test B[ClosedInterval(1.2, 1.6), :] == B[ClosedInterval(1.2, 1.6)] == B[2:end,:] +@test @view(B[ClosedInterval(1.0, 1.5), :]) == @view(B[ClosedInterval(1.0, 1.5)]) == B[:,:] +@test @view(B[ClosedInterval(1.0, 1.3), :]) == @view(B[ClosedInterval(1.0, 1.3)]) == B[1:3,:] +@test @view(B[ClosedInterval(1.15, 1.3), :]) == @view(B[ClosedInterval(1.15, 1.3)]) == B[2:3,:] +@test @view(B[ClosedInterval(1.2, 1.5), :]) == @view(B[ClosedInterval(1.2, 1.5)]) == B[2:end,:] +@test @view(B[ClosedInterval(1.2, 1.6), :]) == @view(B[ClosedInterval(1.2, 1.6)]) == B[2:end,:] + A = AxisArray(reshape(1:256, 4,4,4,4), Axis{:d1}(.1:.1:.4), Axis{:d2}(1//10:1//10:4//10), Axis{:d3}(["1","2","3","4"]), Axis{:d4}([:a, :b, :c, :d])) ax1 = axes(A)[1] @test A[Axis{:d1}(2)] == A[ax1(2)] @@ -115,6 +128,7 @@ AxisArrays.axistrait(::AbstractVector{IntLike}) = AxisArrays.Dimensional end for (r, Irel) in ((0.1:0.1:10.0, -0.5..0.5), # FloatRange + (22.1:0.1:32.0, -0.5..0.5), (linspace(0.1, 10.0, 100), -0.51..0.51), # LinSpace (IL.IntLike(1):IL.IntLike(1):IL.IntLike(100), IL.IntLike(-5)..IL.IntLike(5))) # StepRange @@ -165,3 +179,11 @@ A = AxisArray(rand(2,2), :x, :y) acc = zeros(Int, 4, 1, 2) Base.mapreducedim!(x->x>5, +, acc, A3) @test acc == reshape([1 3; 2 3; 2 3; 2 3], 4, 1, 2) + +# Test using dates +using Base.Dates: Day, Month +A = AxisArray(1:365, Date(2017,1,1):Date(2017,12,31)) +@test A[Date(2017,2,1) .. Date(2017,2,28)] == collect(31 + (1:28)) # February +@test A[(-Day(13)..Day(14)) + Date(2017,2,14)] == collect(31 + (1:28)) +@test A[(-Day(14)..Day(14)) + DateTime(2017,2,14,12)] == collect(31 + (1:28)) +@test A[(Day(0)..Day(6)) + (Date(2017,1,1):Month(1):Date(2017,4,12))] == [1:7 32:38 60:66 91:97] diff --git a/test/search.jl b/test/search.jl index 0fa0020..c37a192 100644 --- a/test/search.jl +++ b/test/search.jl @@ -10,25 +10,25 @@ import AxisArrays: searchsortednearest @test searchsortednearest([1,1,2,2,3,3], Inf) === 6 @test searchsortednearest([1,1,2,2,3,3], -Inf) === 1 -# unsafe searchsorted for ranges -import AxisArrays: unsafe_searchsorted -@test unsafe_searchsorted(1:10, -1 .. 1) === -1:1 -@test unsafe_searchsorted(1:10, 12 .. 15) === 12:15 -@test unsafe_searchsorted(0:2:10, -3 .. -1) === 0:0 -@test unsafe_searchsorted(0:2:10, -5 .. 3) === -1:2 +# Extrapolated searching for ranges +import AxisArrays: Extrapolated +@test Extrapolated.searchsorted(1:10, -1 .. 1) === -1:1 +@test Extrapolated.searchsorted(1:10, 12 .. 15) === 12:15 +@test Extrapolated.searchsorted(0:2:10, -3 .. -1) === 0:0 +@test Extrapolated.searchsorted(0:2:10, -5 .. 3) === -1:2 -@test unsafe_searchsorted(1:2, 4.5 .. 4.5) === 5:4 -@test unsafe_searchsorted(1:2, 3.5 .. 3.5) === 4:3 -@test unsafe_searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5) -@test unsafe_searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5) -@test unsafe_searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5) -@test unsafe_searchsorted(1:2, -0.5 .. -0.5) === 0:-1 -@test unsafe_searchsorted(1:2, -1.5 .. -1.5) === -1:-2 +@test Extrapolated.searchsorted(1:2, 4.5 .. 4.5) === 5:4 +@test Extrapolated.searchsorted(1:2, 3.5 .. 3.5) === 4:3 +@test Extrapolated.searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5) +@test Extrapolated.searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5) +@test Extrapolated.searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5) +@test Extrapolated.searchsorted(1:2, -0.5 .. -0.5) === 0:-1 +@test Extrapolated.searchsorted(1:2, -1.5 .. -1.5) === -1:-2 -@test unsafe_searchsorted(2:2:4, 0x6 .. 0x6) === 3:3 -@test unsafe_searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2 -@test unsafe_searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2 -@test unsafe_searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1 -@test unsafe_searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1 -@test unsafe_searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0 -@test unsafe_searchsorted(2:2:4, 0x0 .. 0x0) === 0:0 +@test Extrapolated.searchsorted(2:2:4, 0x6 .. 0x6) === 3:3 +@test Extrapolated.searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2 +@test Extrapolated.searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2 +@test Extrapolated.searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1 +@test Extrapolated.searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1 +@test Extrapolated.searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0 +@test Extrapolated.searchsorted(2:2:4, 0x0 .. 0x0) === 0:0