From c6e03db86a5a1711118e0b2dee4f14a74a863082 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Thu, 8 Jun 2017 03:02:35 -0500 Subject: [PATCH 1/7] Fix interval indexing with offset axes This one is terrifying. We were only testing against axes of the form `step:step:last`. Requires https://github.com/ajkeller34/Unitful.jl/pull/90. --- src/indexing.jl | 14 +++++++------- test/indexing.jl | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 5d1b22a..2324ca2 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -186,28 +186,28 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval) # there will be a non-constant number of indices in each repetition. # Two small tricks are used here: # * Compute the resulting interval axis with unsafe indexing without any offset -# - Since it's a range, we can do this, and it makes the resulting axis useful +# - Simply divide by the step to get indices relative to zero # * Snap the offsets to the nearest datapoint to avoid fencepost problems # Adds a dimension to the result; rows represent the interval and columns are offsets. axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval) n = length(idx.offsets) - idxs = unsafe_searchsorted(ax, idx.window) + idxs = ceil(Integer, idx.window.left/step(ax)):floor(Integer, idx.window.right/step(ax)) offsets = [searchsortednearest(ax, idx.offsets[i]) for i=1:n] - AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[offsets])) + AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(idxs*step(ax)), Axis{:rep}(ax[offsets])) end # We also have special datatypes to represent intervals about indices axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::IntervalAtIndex) = searchsorted(ax, idx.window + ax[idx.index]) function axisindexes(::Type{Dimensional}, ax::Range, idx::IntervalAtIndex) - idxs = unsafe_searchsorted(ax, idx.window) - AxisArray(idxs + idx.index, Axis{:sub}(inbounds_getindex(ax, idxs))) + idxs = ceil(Integer, idx.window.left/step(ax)):floor(Integer, idx.window.right/step(ax)) + AxisArray(idxs + idx.index, Axis{:sub}(idxs*step(ax))) end axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedIntervalAtIndexes) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedIntervalAtIndexes) n = length(idx.indexes) - idxs = unsafe_searchsorted(ax, idx.window) - AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(inbounds_getindex(ax, idxs)), Axis{:rep}(ax[idx.indexes])) + idxs = ceil(Integer, idx.window.left/step(ax)):floor(Integer, idx.window.right/step(ax)) + AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(idxs*step(ax)), Axis{:rep}(ax[idx.indexes])) end # Categorical axes may be indexed by their elements diff --git a/test/indexing.jl b/test/indexing.jl index 00183d8..ae1f240 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -70,6 +70,19 @@ B = AxisArray(reshape(1:15, 5,3), .1:.1:0.5, [:a, :b, :c]) @test B[Axis{:row}(ClosedInterval(0.15, 0.3))] == @view(B[Axis{:row}(ClosedInterval(0.15, 0.3))]) == B[2:3,:] +# Test indexing by Intervals that aren't of the form step:step:last +B = AxisArray(reshape(1:15, 5,3), 1.1:0.1:1.5, [:a, :b, :c]) +@test B[ClosedInterval(1.0, 1.5), :] == B[ClosedInterval(1.0, 1.5)] == B[:,:] +@test B[ClosedInterval(1.0, 1.3), :] == B[ClosedInterval(1.0, 1.3)] == B[1:3,:] +@test B[ClosedInterval(1.15, 1.3), :] == B[ClosedInterval(1.15, 1.3)] == B[2:3,:] +@test B[ClosedInterval(1.2, 1.5), :] == B[ClosedInterval(1.2, 1.5)] == B[2:end,:] +@test B[ClosedInterval(1.2, 1.6), :] == B[ClosedInterval(1.2, 1.6)] == B[2:end,:] +@test @view(B[ClosedInterval(1.0, 1.5), :]) == @view(B[ClosedInterval(1.0, 1.5)]) == B[:,:] +@test @view(B[ClosedInterval(1.0, 1.3), :]) == @view(B[ClosedInterval(1.0, 1.3)]) == B[1:3,:] +@test @view(B[ClosedInterval(1.15, 1.3), :]) == @view(B[ClosedInterval(1.15, 1.3)]) == B[2:3,:] +@test @view(B[ClosedInterval(1.2, 1.5), :]) == @view(B[ClosedInterval(1.2, 1.5)]) == B[2:end,:] +@test @view(B[ClosedInterval(1.2, 1.6), :]) == @view(B[ClosedInterval(1.2, 1.6)]) == B[2:end,:] + A = AxisArray(reshape(1:256, 4,4,4,4), Axis{:d1}(.1:.1:.4), Axis{:d2}(1//10:1//10:4//10), Axis{:d3}(["1","2","3","4"]), Axis{:d4}([:a, :b, :c, :d])) ax1 = axes(A)[1] @test A[Axis{:d1}(2)] == A[ax1(2)] @@ -115,6 +128,7 @@ AxisArrays.axistrait(::AbstractVector{IntLike}) = AxisArrays.Dimensional end for (r, Irel) in ((0.1:0.1:10.0, -0.5..0.5), # FloatRange + (22.1:0.1:32.0, -0.5..0.5), (linspace(0.1, 10.0, 100), -0.51..0.51), # LinSpace (IL.IntLike(1):IL.IntLike(1):IL.IntLike(100), IL.IntLike(-5)..IL.IntLike(5))) # StepRange From f5eeb1d21252f4f212dacddf05277bbd85d23edc Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sat, 10 Jun 2017 18:53:50 -0500 Subject: [PATCH 2/7] Simpler approach with relativesearchsorted --- src/indexing.jl | 36 +++++++++++++++++++++++------------- src/search.jl | 31 ++++++++++++++++++++++++++++--- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 2324ca2..33ba1c7 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -184,30 +184,40 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval) # Or repeated intervals, which only work if the axis is a range since otherwise # there will be a non-constant number of indices in each repetition. -# Two small tricks are used here: -# * Compute the resulting interval axis with unsafe indexing without any offset -# - Simply divide by the step to get indices relative to zero -# * Snap the offsets to the nearest datapoint to avoid fencepost problems -# Adds a dimension to the result; rows represent the interval and columns are offsets. +# +# There are a few challenges here: +# * This operation adds a dimension to the result; rows represent the interval +# (or subset) and columns are offsets (or repetition). A RepeatedRangeMatrix +# represents the resulting matrix of indices very nicely. +# * We also want the returned matrix to keep track of its axes; the axis +# subset (ax_sub) is the relative location of the interval with respect to +# each offset, and the repetitions (ax_rep) is the array of offsets. +# +# The hard part is that it is possible (and likely!) that neither the interval +# endpoints nor the offsets fall exactly upon an axis value. Or even worse: the +# some offsets when added to the interval could span more elements than others +# (the fencepost problem). As such, we need to be careful about how and when we +# snap the provided intervals and offsets to exact axis values (and indices). +# +# axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval) - n = length(idx.offsets) - idxs = ceil(Integer, idx.window.left/step(ax)):floor(Integer, idx.window.right/step(ax)) - offsets = [searchsortednearest(ax, idx.offsets[i]) for i=1:n] - AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(idxs*step(ax)), Axis{:rep}(ax[offsets])) + idxs, vals = relativesearchsorted(ax, idx.window) + offsets = [searchsortednearest(ax, offset) for offset in idx.offsets] + AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(ax[offsets])) end # We also have special datatypes to represent intervals about indices axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::IntervalAtIndex) = searchsorted(ax, idx.window + ax[idx.index]) function axisindexes(::Type{Dimensional}, ax::Range, idx::IntervalAtIndex) - idxs = ceil(Integer, idx.window.left/step(ax)):floor(Integer, idx.window.right/step(ax)) - AxisArray(idxs + idx.index, Axis{:sub}(idxs*step(ax))) + idxs, vals = relativesearchsorted(ax, idx.window) + AxisArray(idxs + idx.index, Axis{:sub}(vals)) end axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedIntervalAtIndexes) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedIntervalAtIndexes) n = length(idx.indexes) - idxs = ceil(Integer, idx.window.left/step(ax)):floor(Integer, idx.window.right/step(ax)) - AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(idxs*step(ax)), Axis{:rep}(ax[idx.indexes])) + idxs, vals = relativesearchsorted(ax, idx.window) + AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(vals), Axis{:rep}(ax[idx.indexes])) end # Categorical axes may be indexed by their elements diff --git a/src/search.jl b/src/search.jl index a47ccb2..ee94787 100644 --- a/src/search.jl +++ b/src/search.jl @@ -16,6 +16,25 @@ function searchsortednearest(vec::AbstractVector, x) return idx end +function unsafe_searchsortednearest(vec::Range, x) + idx = unsafe_searchsortedfirst(vec, x) # Returns the first idx | vec[idx] >= x + if (inbounds_getindex(vec, idx) - x) > (x - inbounds_getindex(vec, idx-1)) + idx -= 1 # The previous element is closer + end + return idx +end + +""" + relativesearchsorted(r::Range, x) + +Returns a tuple of indices and values that represent how the value `x` is offset +from zero for the range `r`. +""" +function relativesearchsorted(r::Range, x) + idxs = unsafe_searchsorted(r, x) + vals = inbounds_getindex(r, idxs) + return (idxs - unsafe_searchsortednearest(r, 0), vals) +end # We depend upon extrapolative behaviors in searching ranges to shift axes. # This can be done by stealing Base's implementations and removing the bounds- # correcting min/max. @@ -57,9 +76,15 @@ if VERSION < v"0.6.0-dev.2390" else include_string(""" @inline inbounds_getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i) - @inline function inbounds_getindex(r::StepRangeLen, s::OrdinalRange) - vfirst = Base.unsafe_getindex(r, first(s)) - StepRangeLen(vfirst, step(r)*step(s), length(s)) + @inline function inbounds_getindex(r::StepRangeLen, s::AbstractUnitRange) + soffset = 1 + (r.offset - first(s)) + soffset = clamp(soffset, 1, length(s)) + ioffset = first(s) + (soffset-1) + if ioffset == r.offset + StepRangeLen(r.ref, r.step, length(s), max(1,soffset)) + else + StepRangeLen(r.ref + (ioffset-r.offset)*r.step, r.step, length(s), max(1,soffset)) + end end """) end From 5c653c18d1c31ce4b12fa263e8350e6a5b55e5e2 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sat, 10 Jun 2017 19:20:20 -0500 Subject: [PATCH 3/7] Try using nsteps instead --- src/indexing.jl | 6 +++--- src/search.jl | 23 ++++++++++++++++++----- test/indexing.jl | 1 + 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 33ba1c7..915ecc5 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -202,7 +202,7 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval) # axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval) - idxs, vals = relativesearchsorted(ax, idx.window) + idxs, vals = relativewindow(ax, idx.window) offsets = [searchsortednearest(ax, offset) for offset in idx.offsets] AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(ax[offsets])) end @@ -210,13 +210,13 @@ end # We also have special datatypes to represent intervals about indices axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::IntervalAtIndex) = searchsorted(ax, idx.window + ax[idx.index]) function axisindexes(::Type{Dimensional}, ax::Range, idx::IntervalAtIndex) - idxs, vals = relativesearchsorted(ax, idx.window) + idxs, vals = relativewindow(ax, idx.window) AxisArray(idxs + idx.index, Axis{:sub}(vals)) end axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedIntervalAtIndexes) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedIntervalAtIndexes) n = length(idx.indexes) - idxs, vals = relativesearchsorted(ax, idx.window) + idxs, vals = relativewindow(ax, idx.window) AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(vals), Axis{:rep}(ax[idx.indexes])) end diff --git a/src/search.jl b/src/search.jl index ee94787..d4dc603 100644 --- a/src/search.jl +++ b/src/search.jl @@ -24,16 +24,29 @@ function unsafe_searchsortednearest(vec::Range, x) return idx end + +nsteps(x, step) = floor(Int, abs(x / step)) * Int(sign(x)) +function nsteps{T}(x, step::Base.TwicePrecision{T}) + # this is basically a hack because Base hasn't defined x/step at TwicePrecision resolution + nf = abs(x / convert(T, step)) + nc = ceil(Int, nf) + return (abs(convert(T, nc*step)) <= abs(x) ? nc : floor(Int, nf)) * Int(sign(x)) +end + +_step(r::Range) = step(r) +_step(r::StepRangeLen) = r.step + """ - relativesearchsorted(r::Range, x) + relativewindow(r::Range, x::ClosedInterval) Returns a tuple of indices and values that represent how the value `x` is offset from zero for the range `r`. """ -function relativesearchsorted(r::Range, x) - idxs = unsafe_searchsorted(r, x) - vals = inbounds_getindex(r, idxs) - return (idxs - unsafe_searchsortednearest(r, 0), vals) +function relativewindow(r::Range, x::ClosedInterval) + s = _step(r) + idxs = nsteps(x.left, s):nsteps(x.right, s) + vals = StepRangeLen(idxs[1]*s, s, length(idxs)) + return (idxs, vals) end # We depend upon extrapolative behaviors in searching ranges to shift axes. # This can be done by stealing Base's implementations and removing the bounds- diff --git a/test/indexing.jl b/test/indexing.jl index ae1f240..3a6e481 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -121,6 +121,7 @@ Base.div(x::IntLike, y::IntLike) = div(x.val, y.val) Base.:*(x::IntLike, y::Int) = IntLike(x.val * y) Base.:*(x::Int, y::IntLike) = y*x Base.:/(x::IntLike, y::Int) = IntLike(x.val / y) +Base.abs(x::IntLike) = IntLike(abs(x.val)) Base.promote_rule(::Type{IntLike}, ::Type{Int}) = Int Base.convert(::Type{Int}, x::IntLike) = x.val using AxisArrays From 925958b92e711104f4fbe413af65f55bf3b7b104 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sat, 10 Jun 2017 20:46:16 -0500 Subject: [PATCH 4/7] add support for Dates --- src/indexing.jl | 6 +++++- src/search.jl | 8 ++++++-- test/indexing.jl | 9 ++++++++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 915ecc5..0cfa96e 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -199,7 +199,11 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval) # (the fencepost problem). As such, we need to be careful about how and when we # snap the provided intervals and offsets to exact axis values (and indices). # -# +# Note that this is fundamentally different than indexing by a single interval; +# whereas those intervals are specified in the same units as the elements of the +# axis itself, repeated intervals are specified in terms of _offsets_. This is +# most obvious with dates; single intervals are between dates, repeated +# intervals use intervals of days (for example) and offsets of dates. axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval) idxs, vals = relativewindow(ax, idx.window) diff --git a/src/search.jl b/src/search.jl index d4dc603..ea79a92 100644 --- a/src/search.jl +++ b/src/search.jl @@ -25,12 +25,16 @@ function unsafe_searchsortednearest(vec::Range, x) end -nsteps(x, step) = floor(Int, abs(x / step)) * Int(sign(x)) +function nsteps(x, step) + offset = floor(Int, abs(x / step)) + return x < zero(x) ? -offset : offset +end function nsteps{T}(x, step::Base.TwicePrecision{T}) # this is basically a hack because Base hasn't defined x/step at TwicePrecision resolution nf = abs(x / convert(T, step)) nc = ceil(Int, nf) - return (abs(convert(T, nc*step)) <= abs(x) ? nc : floor(Int, nf)) * Int(sign(x)) + offset = (abs(convert(T, nc*step)) <= abs(x) ? nc : floor(Int, nf)) + return x < zero(x) ? -offset : offset end _step(r::Range) = step(r) diff --git a/test/indexing.jl b/test/indexing.jl index 3a6e481..cca1748 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -121,7 +121,6 @@ Base.div(x::IntLike, y::IntLike) = div(x.val, y.val) Base.:*(x::IntLike, y::Int) = IntLike(x.val * y) Base.:*(x::Int, y::IntLike) = y*x Base.:/(x::IntLike, y::Int) = IntLike(x.val / y) -Base.abs(x::IntLike) = IntLike(abs(x.val)) Base.promote_rule(::Type{IntLike}, ::Type{Int}) = Int Base.convert(::Type{Int}, x::IntLike) = x.val using AxisArrays @@ -180,3 +179,11 @@ A = AxisArray(rand(2,2), :x, :y) acc = zeros(Int, 4, 1, 2) Base.mapreducedim!(x->x>5, +, acc, A3) @test acc == reshape([1 3; 2 3; 2 3; 2 3], 4, 1, 2) + +# Test using dates +using Base.Dates: Day, Month +A = AxisArray(1:365, Date(2017,1,1):Date(2017,12,31)) +@test A[Date(2017,2,1) .. Date(2017,2,28)] == collect(31 + (1:28)) # February +@test A[(-Day(13)..Day(14)) + Date(2017,2,14)] == collect(31 + (1:28)) +@test A[(-Day(14)..Day(14)) + DateTime(2017,2,14,12)] == collect(31 + (1:28)) +@test A[(Day(0)..Day(6)) + (Date(2017,1,1):Month(1):Date(2017,4,12))] == [1:7 32:38 60:66 91:97] From d080469d997057f088f3f06c0d87d25cc5db01e4 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sat, 10 Jun 2017 22:59:22 -0500 Subject: [PATCH 5/7] WIP: add div/mul for TwicePrecision? --- src/search.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/search.jl b/src/search.jl index ea79a92..4872a7e 100644 --- a/src/search.jl +++ b/src/search.jl @@ -24,6 +24,20 @@ function unsafe_searchsortednearest(vec::Range, x) return idx end +# Dekker div2 +import Base: TwicePrecision, splitprec +function Base.inv(y::TwicePrecision) + c = inv(y.hi) + chh, clo = splitprec(c) + u = TwicePrecision(chi, clo) * y.hi + cc = (((1 - u.hi) - u.lo) - c*y.lo)/y.hi + TwicePrecision(c, cc) +end +function *{T}(x::TwicePrecision{T}, y::TwicePrecision{T}) + c = TwicePrecision(splitprec(x.hi)...) * y.hi + cc = (x.hi * y.lo + x.lo* y.hi) + c.lo + TwicePrecision(c.hi, cc) +end function nsteps(x, step) offset = floor(Int, abs(x / step)) From 0fa4cb739de551a0965caf2fe7d9b0c48bccedef Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sun, 11 Jun 2017 00:02:33 -0500 Subject: [PATCH 6/7] use a phony range instead for 0.5 support --- src/indexing.jl | 47 ++++++++++++++++++++++++++++++----------- src/search.jl | 56 +++++-------------------------------------------- 2 files changed, 40 insertions(+), 63 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index 0cfa96e..f5f7f2f 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -185,30 +185,54 @@ axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::ClosedInterval) # Or repeated intervals, which only work if the axis is a range since otherwise # there will be a non-constant number of indices in each repetition. # -# There are a few challenges here: +# There are a number of challenges here: # * This operation adds a dimension to the result; rows represent the interval # (or subset) and columns are offsets (or repetition). A RepeatedRangeMatrix # represents the resulting matrix of indices very nicely. # * We also want the returned matrix to keep track of its axes; the axis # subset (ax_sub) is the relative location of the interval with respect to # each offset, and the repetitions (ax_rep) is the array of offsets. +# * We are interested in the resulting *addition* of the interval against the +# offsets. Either the offsets or the interval may independently be out of +# bounds prior to this addition. Even worse: the interval may have different +# units than the axis (e.g., `(Day(-1)..Day(1)) + dates` for a three-day +# span around dates of interest over a Date axis). +# * It is possible (and likely!) that neither the interval endpoints nor the +# offsets fall exactly upon an axis value. Or even worse: the some offsets +# when added to the interval could span more elements than others (the +# fencepost problem). As such, we need to be careful about how and when we +# snap the provided intervals and offsets to exact axis values (and indices). # -# The hard part is that it is possible (and likely!) that neither the interval -# endpoints nor the offsets fall exactly upon an axis value. Or even worse: the -# some offsets when added to the interval could span more elements than others -# (the fencepost problem). As such, we need to be careful about how and when we -# snap the provided intervals and offsets to exact axis values (and indices). +# To avoid the fencepost problems and to define the axes, we convert the +# interval to a UnitRange of relative indices and the array of offsets to an +# array of absolute indices (independently of each other). Exactly how we do so +# must be carefully considered. # # Note that this is fundamentally different than indexing by a single interval; # whereas those intervals are specified in the same units as the elements of the -# axis itself, repeated intervals are specified in terms of _offsets_. This is -# most obvious with dates; single intervals are between dates, repeated -# intervals use intervals of days (for example) and offsets of dates. +# axis itself, these intervals are specified in terms of _offsets_. At the same +# time, we want `A[interval] == vec(A[interval + [0]])`. To make these +# computations as similar as possible, we use a phony range of the form +# `step(ax):step(ax):step(ax)` in order to search for the interval. +phony_range(r::Range) = step(r):step(r):step(r) +phony_range(r::AbstractUnitRange) = step(r):step(r) +if VERSION < v"0.6-pre" + phony_range(r::FloatRange) = FloatRange(r.step, r.step, one(r.len), r.divisor) +else + phony_range(r::StepRangeLen) = StepRangeLen(r.step, r.step, 1) +end +function relativewindow(r::Range, x::ClosedInterval) + pr = phony_range(r) + idxs = unsafe_searchsorted(pr, x) + vals = inbounds_getindex(pr, idxs) + return (idxs, vals) +end + axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval) idxs, vals = relativewindow(ax, idx.window) - offsets = [searchsortednearest(ax, offset) for offset in idx.offsets] - AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(ax[offsets])) + offsets = [unsafe_searchsortednearest(ax, offset) for offset in idx.offsets] + AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(inbounds_getindex(ax, offsets))) end # We also have special datatypes to represent intervals about indices @@ -219,7 +243,6 @@ function axisindexes(::Type{Dimensional}, ax::Range, idx::IntervalAtIndex) end axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedIntervalAtIndexes) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedIntervalAtIndexes) - n = length(idx.indexes) idxs, vals = relativewindow(ax, idx.window) AxisArray(RepeatedRangeMatrix(idxs, idx.indexes), Axis{:sub}(vals), Axis{:rep}(ax[idx.indexes])) end diff --git a/src/search.jl b/src/search.jl index 4872a7e..7a5ca54 100644 --- a/src/search.jl +++ b/src/search.jl @@ -24,54 +24,11 @@ function unsafe_searchsortednearest(vec::Range, x) return idx end -# Dekker div2 -import Base: TwicePrecision, splitprec -function Base.inv(y::TwicePrecision) - c = inv(y.hi) - chh, clo = splitprec(c) - u = TwicePrecision(chi, clo) * y.hi - cc = (((1 - u.hi) - u.lo) - c*y.lo)/y.hi - TwicePrecision(c, cc) -end -function *{T}(x::TwicePrecision{T}, y::TwicePrecision{T}) - c = TwicePrecision(splitprec(x.hi)...) * y.hi - cc = (x.hi * y.lo + x.lo* y.hi) + c.lo - TwicePrecision(c.hi, cc) -end - -function nsteps(x, step) - offset = floor(Int, abs(x / step)) - return x < zero(x) ? -offset : offset -end -function nsteps{T}(x, step::Base.TwicePrecision{T}) - # this is basically a hack because Base hasn't defined x/step at TwicePrecision resolution - nf = abs(x / convert(T, step)) - nc = ceil(Int, nf) - offset = (abs(convert(T, nc*step)) <= abs(x) ? nc : floor(Int, nf)) - return x < zero(x) ? -offset : offset -end - -_step(r::Range) = step(r) -_step(r::StepRangeLen) = r.step - -""" - relativewindow(r::Range, x::ClosedInterval) - -Returns a tuple of indices and values that represent how the value `x` is offset -from zero for the range `r`. -""" -function relativewindow(r::Range, x::ClosedInterval) - s = _step(r) - idxs = nsteps(x.left, s):nsteps(x.right, s) - vals = StepRangeLen(idxs[1]*s, s, length(idxs)) - return (idxs, vals) -end # We depend upon extrapolative behaviors in searching ranges to shift axes. # This can be done by stealing Base's implementations and removing the bounds- # correcting min/max. # TODO: This could plug into the sorting system better, but it's fine for now -# TODO: This needs to support Dates. """ unsafe_searchsorted(a::Range, I::ClosedInterval) @@ -95,17 +52,15 @@ end st = oftype(f, f + (first(s)-1)*step(r)) range(st, step(r)*step(s), length(s)) end +inbounds_getindex(r::Range, I::Array) = [inbounds_getindex(r, i) for i in I] if VERSION < v"0.6.0-dev.2390" - include_string(""" @inline function inbounds_getindex{T}(r::FloatRange{T}, i::Integer) convert(T, (r.start + (i-1)*r.step)/r.divisor) end @inline function inbounds_getindex(r::FloatRange, s::OrdinalRange) FloatRange(r.start + (first(s)-1)*r.step, step(s)*r.step, length(s), r.divisor) end - """) else - include_string(""" @inline inbounds_getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i) @inline function inbounds_getindex(r::StepRangeLen, s::AbstractUnitRange) soffset = 1 + (r.offset - first(s)) @@ -117,24 +72,23 @@ else StepRangeLen(r.ref + (ioffset-r.offset)*r.step, r.step, length(s), max(1,soffset)) end end - """) end -function unsafe_searchsortedlast{T<:Number}(a::Range{T}, x::Number) +function unsafe_searchsortedlast(a::Range, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) n = round(Integer,(x-first(a))/step(a))+1 isless(x, inbounds_getindex(a, n)) ? n-1 : n end -function unsafe_searchsortedfirst{T<:Number}(a::Range{T}, x::Number) +function unsafe_searchsortedfirst(a::Range, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) n = round(Integer,(x-first(a))/step(a))+1 isless(inbounds_getindex(a, n), x) ? n+1 : n end -function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Number) +function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) fld(floor(Integer,x)-first(a),step(a))+1 end -function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Number) +function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) -fld(floor(Integer,-x)+first(a),step(a))+1 end From 0b2eac476e21a3d7f77c7300733c77a1615d5318 Mon Sep 17 00:00:00 2001 From: Matt Bauman Date: Sun, 11 Jun 2017 00:20:00 -0500 Subject: [PATCH 7/7] Rename the unsafe/inbounds methods into an Extrapolated internal module --- src/indexing.jl | 8 +++---- src/search.jl | 64 ++++++++++++++++++++++++++----------------------- test/search.jl | 40 +++++++++++++++---------------- 3 files changed, 58 insertions(+), 54 deletions(-) diff --git a/src/indexing.jl b/src/indexing.jl index f5f7f2f..8869038 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -223,16 +223,16 @@ else end function relativewindow(r::Range, x::ClosedInterval) pr = phony_range(r) - idxs = unsafe_searchsorted(pr, x) - vals = inbounds_getindex(pr, idxs) + idxs = Extrapolated.searchsorted(pr, x) + vals = Extrapolated.getindex(pr, idxs) return (idxs, vals) end axisindexes(::Type{Dimensional}, ax::AbstractVector, idx::RepeatedInterval) = error("repeated intervals might select a varying number of elements for non-range axes; use a repeated Range of indices instead") function axisindexes(::Type{Dimensional}, ax::Range, idx::RepeatedInterval) idxs, vals = relativewindow(ax, idx.window) - offsets = [unsafe_searchsortednearest(ax, offset) for offset in idx.offsets] - AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(inbounds_getindex(ax, offsets))) + offsets = [Extrapolated.searchsortednearest(ax, offset) for offset in idx.offsets] + AxisArray(RepeatedRangeMatrix(idxs, offsets), Axis{:sub}(vals), Axis{:rep}(Extrapolated.getindex(ax, offsets))) end # We also have special datatypes to represent intervals about indices diff --git a/src/search.jl b/src/search.jl index 7a5ca54..056a429 100644 --- a/src/search.jl +++ b/src/search.jl @@ -15,54 +15,57 @@ function searchsortednearest(vec::AbstractVector, x) end return idx end +# Base only specializes searching ranges by Numbers; so optimize for Intervals +function Base.searchsorted(a::Range, I::ClosedInterval) + searchsortedfirst(a, I.left):searchsortedlast(a, I.right) +end + +""" +The internal `Extrapolated` module contains implementations for indexing and +searching into ranges beyond their bounds. The `@inbounds` macro is not +sufficient since it can be turned off by `--check-bounds=yes`. +""" +module Extrapolated +using ..ClosedInterval -function unsafe_searchsortednearest(vec::Range, x) - idx = unsafe_searchsortedfirst(vec, x) # Returns the first idx | vec[idx] >= x - if (inbounds_getindex(vec, idx) - x) > (x - inbounds_getindex(vec, idx-1)) +function searchsortednearest(vec::Range, x) + idx = searchsortedfirst(vec, x) # Returns the first idx | vec[idx] >= x + if (getindex(vec, idx) - x) > (x - getindex(vec, idx-1)) idx -= 1 # The previous element is closer end return idx end -# We depend upon extrapolative behaviors in searching ranges to shift axes. -# This can be done by stealing Base's implementations and removing the bounds- -# correcting min/max. - -# TODO: This could plug into the sorting system better, but it's fine for now """ - unsafe_searchsorted(a::Range, I::ClosedInterval) + searchsorted(a::Range, I::ClosedInterval) Return the indices of the range that fall within an interval without checking bounds, possibly extrapolating outside the range if needed. """ -function unsafe_searchsorted(a::Range, I::ClosedInterval) - unsafe_searchsortedfirst(a, I.left):unsafe_searchsortedlast(a, I.right) -end -# Base only specializes searching ranges by Numbers; so optimize for Intervals -function Base.searchsorted(a::Range, I::ClosedInterval) +function searchsorted(a::Range, I::ClosedInterval) searchsortedfirst(a, I.left):searchsortedlast(a, I.right) end -# When running with "--check-bounds=yes" (like on Travis), the bounds-check isn't elided -@inline function inbounds_getindex{T}(v::Range{T}, i::Integer) +# When running with "--check-bounds=yes`(like on Travis), the bounds-check isn't elided +@inline function getindex{T}(v::Range{T}, i::Integer) convert(T, first(v) + (i-1)*step(v)) end -@inline function inbounds_getindex{T<:Integer}(r::Range, s::Range{T}) +@inline function getindex{T<:Integer}(r::Range, s::Range{T}) f = first(r) st = oftype(f, f + (first(s)-1)*step(r)) range(st, step(r)*step(s), length(s)) end -inbounds_getindex(r::Range, I::Array) = [inbounds_getindex(r, i) for i in I] +getindex(r::Range, I::Array) = [getindex(r, i) for i in I] if VERSION < v"0.6.0-dev.2390" - @inline function inbounds_getindex{T}(r::FloatRange{T}, i::Integer) + @inline function getindex{T}(r::FloatRange{T}, i::Integer) convert(T, (r.start + (i-1)*r.step)/r.divisor) end - @inline function inbounds_getindex(r::FloatRange, s::OrdinalRange) + @inline function getindex(r::FloatRange, s::OrdinalRange) FloatRange(r.start + (first(s)-1)*r.step, step(s)*r.step, length(s), r.divisor) end else - @inline inbounds_getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i) - @inline function inbounds_getindex(r::StepRangeLen, s::AbstractUnitRange) + @inline getindex(r::StepRangeLen, i::Integer) = Base.unsafe_getindex(r, i) + @inline function getindex(r::StepRangeLen, s::AbstractUnitRange) soffset = 1 + (r.offset - first(s)) soffset = clamp(soffset, 1, length(s)) ioffset = first(s) + (soffset-1) @@ -74,29 +77,30 @@ else end end -function unsafe_searchsortedlast(a::Range, x) +function searchsortedlast(a::Range, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) n = round(Integer,(x-first(a))/step(a))+1 - isless(x, inbounds_getindex(a, n)) ? n-1 : n + isless(x, getindex(a, n)) ? n-1 : n end -function unsafe_searchsortedfirst(a::Range, x) +function searchsortedfirst(a::Range, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) n = round(Integer,(x-first(a))/step(a))+1 - isless(inbounds_getindex(a, n), x) ? n+1 : n + isless(getindex(a, n), x) ? n+1 : n end -function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x) +function searchsortedlast{T<:Integer}(a::Range{T}, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) fld(floor(Integer,x)-first(a),step(a))+1 end -function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x) +function searchsortedfirst{T<:Integer}(a::Range{T}, x) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) -fld(floor(Integer,-x)+first(a),step(a))+1 end -function unsafe_searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned) +function searchsortedfirst{T<:Integer}(a::Range{T}, x::Unsigned) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) -fld(first(a)-signed(x),step(a))+1 end -function unsafe_searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned) +function searchsortedlast{T<:Integer}(a::Range{T}, x::Unsigned) step(a) == 0 && throw(ArgumentError("ranges with a zero step are unsupported")) fld(signed(x)-first(a),step(a))+1 end +end \ No newline at end of file diff --git a/test/search.jl b/test/search.jl index 0fa0020..c37a192 100644 --- a/test/search.jl +++ b/test/search.jl @@ -10,25 +10,25 @@ import AxisArrays: searchsortednearest @test searchsortednearest([1,1,2,2,3,3], Inf) === 6 @test searchsortednearest([1,1,2,2,3,3], -Inf) === 1 -# unsafe searchsorted for ranges -import AxisArrays: unsafe_searchsorted -@test unsafe_searchsorted(1:10, -1 .. 1) === -1:1 -@test unsafe_searchsorted(1:10, 12 .. 15) === 12:15 -@test unsafe_searchsorted(0:2:10, -3 .. -1) === 0:0 -@test unsafe_searchsorted(0:2:10, -5 .. 3) === -1:2 +# Extrapolated searching for ranges +import AxisArrays: Extrapolated +@test Extrapolated.searchsorted(1:10, -1 .. 1) === -1:1 +@test Extrapolated.searchsorted(1:10, 12 .. 15) === 12:15 +@test Extrapolated.searchsorted(0:2:10, -3 .. -1) === 0:0 +@test Extrapolated.searchsorted(0:2:10, -5 .. 3) === -1:2 -@test unsafe_searchsorted(1:2, 4.5 .. 4.5) === 5:4 -@test unsafe_searchsorted(1:2, 3.5 .. 3.5) === 4:3 -@test unsafe_searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5) -@test unsafe_searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5) -@test unsafe_searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5) -@test unsafe_searchsorted(1:2, -0.5 .. -0.5) === 0:-1 -@test unsafe_searchsorted(1:2, -1.5 .. -1.5) === -1:-2 +@test Extrapolated.searchsorted(1:2, 4.5 .. 4.5) === 5:4 +@test Extrapolated.searchsorted(1:2, 3.5 .. 3.5) === 4:3 +@test Extrapolated.searchsorted(1:2, 2.5 .. 2.5) === 3:2 === searchsorted(1:2, 2.5 .. 2.5) +@test Extrapolated.searchsorted(1:2, 1.5 .. 1.5) === 2:1 === searchsorted(1:2, 1.5 .. 1.5) +@test Extrapolated.searchsorted(1:2, 0.5 .. 0.5) === 1:0 === searchsorted(1:2, 0.5 .. 0.5) +@test Extrapolated.searchsorted(1:2, -0.5 .. -0.5) === 0:-1 +@test Extrapolated.searchsorted(1:2, -1.5 .. -1.5) === -1:-2 -@test unsafe_searchsorted(2:2:4, 0x6 .. 0x6) === 3:3 -@test unsafe_searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2 -@test unsafe_searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2 -@test unsafe_searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1 -@test unsafe_searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1 -@test unsafe_searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0 -@test unsafe_searchsorted(2:2:4, 0x0 .. 0x0) === 0:0 +@test Extrapolated.searchsorted(2:2:4, 0x6 .. 0x6) === 3:3 +@test Extrapolated.searchsorted(2:2:4, 0x5 .. 0x5) === searchsorted(2:2:4, 0x5 .. 0x5) === 3:2 +@test Extrapolated.searchsorted(2:2:4, 0x4 .. 0x4) === searchsorted(2:2:4, 0x4 .. 0x4) === 2:2 +@test Extrapolated.searchsorted(2:2:4, 0x3 .. 0x3) === searchsorted(2:2:4, 0x3 .. 0x3) === 2:1 +@test Extrapolated.searchsorted(2:2:4, 0x2 .. 0x2) === searchsorted(2:2:4, 0x2 .. 0x2) === 1:1 +@test Extrapolated.searchsorted(2:2:4, 0x1 .. 0x1) === searchsorted(2:2:4, 0x1 .. 0x1) === 1:0 +@test Extrapolated.searchsorted(2:2:4, 0x0 .. 0x0) === 0:0