Skip to content

Commit 0a7639c

Browse files
authored
Add quantile/value space functions (#94)
* add first code version * reorganize into subfolder * remove unecessary return of `weights` by definition all weights are the same up to +/- 1 element. * use reusable function for averages from indices * add value space * add tests for value space * add docs and version * fix typo * remove ambivuous setindex method...?
1 parent 35b5e8f commit 0a7639c

7 files changed

Lines changed: 176 additions & 5 deletions

File tree

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "ClimateBase"
22
uuid = "35604d93-0fb8-4872-9436-495b01d137e2"
33
authors = ["Datseris <datseris.george@gmail.com>", "Philippe Roy <borghor@yahoo.ca>"]
4-
version = "0.16.0"
4+
version = "0.16.1"
55

66
[deps]
77
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

docs/src/advanced.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ interpolate_height2pressure
1818
interpolate_pressure2height
1919
```
2020

21+
## Quantile/value space
22+
```@docs
23+
quantile_space
24+
value_space
25+
```
26+
2127
## Climate quantities
2228
Functions that calculate climate-related quantities.
2329
```@docs

src/ClimateBase.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ export NCDataset
44
include("core/coredefs.jl")
55
include("core/prettyprint.jl")
66
include("core/aggregation.jl")
7-
include("core/interpolation.jl")
87

98
include("physical_dimensions/spatial.jl")
109
include("physical_dimensions/spatial_equalarea.jl")
@@ -13,6 +12,9 @@ include("physical_dimensions/temporal.jl")
1312
include("io/vector2range.jl")
1413
include("io/netcdf.jl")
1514

15+
include("interpolations/height_interpolation.jl")
16+
include("interpolations/quantile_space.jl")
17+
1618
# All following will be moved to ClimateTools.jl once its updated
1719
include("climate/solar.jl")
1820
include("tsa/continuation.jl")

src/core/coredefs.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ using DimensionalData.Dimensions: setdims
88
using Dates
99

1010
Time = DimensionalData.Ti
11+
Tim = DimensionalData.Ti
1112

1213
AbDimArray = DimensionalData.AbstractDimArray
1314

@@ -30,7 +31,7 @@ gnv(x::Dimension) = parent(parent(x))
3031

3132
export At, (..), Between, Near # Selectors from DimensionalArrays.jl
3233
export hasdim, dims, dimindex
33-
export Time, Lon, Lat, dims, Coord, Hei, Pre, Ti
34+
export Time, Lon, Lat, dims, Coord, Hei, Pre, Ti, Tim
3435
export CoordinateSpace, OrthogonalSpace, spacestructure
3536
export DimensionalData # for accessing its functions
3637
export setdims
@@ -174,8 +175,6 @@ ClimArray(A::AbstractArray, dims::Tuple, name; refdims=(), attrib=nothing) =
174175
ClimArray(A, format(dims, A), refdims, Symbol(name), attrib)
175176

176177
Base.parent(A::ClimArray) = A.data
177-
Base.@propagate_inbounds Base.setindex!(A::ClimArray, x, I::Vararg{DimensionalData.StandardIndices}) =
178-
setindex!(A.data, x, I...)
179178

180179
DimensionalData.metadata(A::ClimArray) = A.attrib
181180
DimensionalData.basetypeof(::ClimArray) = ClimArray
File renamed without changes.
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import StatsBase
2+
export quantile_space, value_space
3+
4+
###########################################################################################
5+
# Quantile space
6+
###########################################################################################
7+
"""
8+
quantile_space(A, B; n = 50) → Aq, Bq, q
9+
Express the array `B` into the quantile space of `A`. `B, A` must have the same indices.
10+
11+
The array `B` is binned according to the quantile values of the elements of `A`.
12+
`n` bins are created in total and the bin edges `p` are returned.
13+
The `i`-th bin contains data whose `A`-quantile values are ∈ [`q[i]`, `q[i+1]`).
14+
The indices of these `A` values are used to group `B`.
15+
16+
In each bin, the binned values of `A, B` are averaged, resulting in `Aq, Bq`.
17+
18+
The amount of datapoints per quantile is by definition `length(A)/n`.
19+
"""
20+
function quantile_space(A, B; n = 50)
21+
@assert size(A) == size(B)
22+
bin_idxs, qs = quantile_decomposition(A, n)
23+
Aquant, Bquant = averages_from_indices(bin_idxs, A, B)
24+
return Aquant, Bquant, qs
25+
end
26+
27+
function quantile_decomposition(A, n)
28+
Avec = vec(A)
29+
qs = range(0, 1; length = n+1)
30+
bin_width = Base.step(qs)
31+
A_cdf = StatsBase.ecdf(Avec)
32+
A_quantiles = A_cdf.(Avec)
33+
bin_idxs = [Int[] for _ in 1:length(qs)-1]
34+
for (i, q) in enumerate(A_quantiles)
35+
j = Int(q÷bin_width) + 1 # index of quantile bin
36+
push!(bin_idxs[j], i)
37+
end
38+
return bin_idxs, qs
39+
end
40+
41+
function averages_from_indices(idxs, As...)
42+
map(A -> map(i -> StatsBase.mean(view(vec(A), i)), idxs), As)
43+
end
44+
45+
# TODO:
46+
# function quantile_space(A, B, C; n = 50)
47+
# I am honestly not sure how to go about that.
48+
49+
###########################################################################################
50+
# Quantile space
51+
###########################################################################################
52+
"""
53+
value_space(A, B; Arange) → Bmeans, bin_indices
54+
Express the array `B` into the value space of `A`. `B, A` must have the same indices.
55+
This means that `A` is binned according to the given `Arange`,
56+
and the same indices that binned `A` are also used to bin `B`.
57+
The `i`-th bin contains data whose `A` values ∈ [`Arange[i]`, `Arange[i+1]`).
58+
In each bin, the binned values of `B` are averaged, resulting in `Bmeans`.
59+
60+
Elements of `A` that are not ∈ `Arange` are skipped.
61+
The returned `bin_indices` are the indices in each bin (hence, the weights
62+
for the means are just `length.(bin_indices)`)
63+
64+
By default `Arange = range(minimum(A), nextfloat(maximum(A)); length = 100)`.
65+
"""
66+
function value_space(A, B; Arange = _default_val_range(A))
67+
@assert size(A) == size(B)
68+
@assert issorted(Arange)
69+
bin_idxs = indices_in_values(A, Arange)
70+
Bmeans, = averages_from_indices(bin_idxs, B)
71+
return Bmeans, bin_idxs
72+
end
73+
74+
_default_val_range(A) = range(minimum(A), nextfloat(maximum(A)); length = 100)
75+
76+
function indices_in_values(A, Arange)
77+
bin_idxs = [Int[] for _ in 1:length(Arange)-1]
78+
# `li` are linear indices of `A`. `bi` are bin indices of the values of `A`.
79+
for (li, a) in enumerate(vec(A))
80+
a > maximum(Arange) && continue
81+
bi = searchsortedlast(Arange, a)
82+
bi == 0 && continue
83+
push!(bin_idxs[bi], li)
84+
end
85+
return bin_idxs
86+
end
87+
88+
# 2D version
89+
"""
90+
value_space(A, B, C; Arange, Brange) → Cmeans, bin_indices
91+
Express the array `C` into the joint value space of `A, B`.
92+
`A, B, C` must have the same indices.
93+
94+
A 2D histogram is created based on the given ranges, the and elements of `C` are binned
95+
according to the values of `A, B`. Then, the elements are averaged, which returns a matrix
96+
`Cmeans`, defined over the joint space S = `Arange × Brange`.
97+
`bin_indices` is also a matrix (with vector elements).
98+
`Cmeans` is `NaN` for bins without any elements.
99+
"""
100+
function value_space(A, B, C;
101+
Arange = _default_val_range(A), Brange = _default_val_range(B)
102+
)
103+
@assert size(A) == size(B) == size(C)
104+
@assert issorted(Arange)
105+
bin_idxs_1D = indices_in_values(A, Arange)
106+
bin_idxs_2D = refine_indices_in_values(bin_idxs_1D, Arange, B, Brange)
107+
Cmeans, = averages_from_indices(bin_idxs_2D, C)
108+
return Cmeans, bin_idxs_2D
109+
end
110+
111+
112+
function refine_indices_in_values(bin_idxs_1D, Arange, B, Brange)
113+
vecB = vec(B)
114+
bin_idxs = Matrix{Vector{Int}}(undef, length(Arange)-1, length(Brange)-1)
115+
for k in eachindex(bin_idxs); bin_idxs[k] = Int[]; end
116+
for (bi, idxs) in enumerate(bin_idxs_1D) # iterate over bins of A
117+
# notice that `idxs` is a container of linear indices for B!
118+
for li in idxs
119+
b = vecB[li]
120+
b > maximum(Brange) && continue
121+
bj = searchsortedlast(Brange, b)
122+
bj == 0 && continue
123+
push!(bin_idxs[bi, bj], li)
124+
end
125+
end
126+
return bin_idxs
127+
end

test/advanced_tests.jl

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,40 @@
1717
@test D_back[Hei(1)] < D_back[Hei(2)] < D_back[Hei(3)] < D_back[Hei(4)] < D_back[Hei(5)]
1818
@test D[Hei(1)] < D_pre[Pre(3)] < D[Hei(11)]
1919
end
20+
21+
@testset "quantile space" begin
22+
23+
end
24+
25+
@testset "value space" begin
26+
t = 0:0.01:5π
27+
x = cos.(t)
28+
y = sin.(t)
29+
rx = range(-1.0, nextfloat(1.0); length = 21)
30+
@testset "1D" begin
31+
using StatsBase
32+
ymeans, bin_indices = value_space(x, y; Arange = rx)
33+
weights = length.(bin_indices)
34+
@test sum(weights) == length(y)
35+
# trigonometric functions have conentrated value weight at the edges
36+
@test weights[1] > weights[length(weights)÷2]
37+
# sine must be very small when cosine is large
38+
@test ymeans[1] < weights[length(weights)÷2]
39+
@test mean(ymeans, Weights(weights)) StatsBase.mean(y)
40+
end
41+
@testset "2D" begin
42+
z = y
43+
zmeans, bin_indices = value_space(x, y, z; Arange = rx, Brange = rx)
44+
# Because cozine and size cannever be 1 at the same time, all corners
45+
# of z must be nan:
46+
L = length(rx)-1
47+
@test all(isnan, [zmeans[1,1], zmeans[1,L], zmeans[L,1], zmeans[L,L]])
48+
# and again because of where cosines and sines may have values at the same time
49+
# zmeans only is non NaN at specific locations one can derive (depends on step size)
50+
non_nan_i = 6:15
51+
@test all(!isnan, zmeans[1, non_nan_i])
52+
@test all(!isnan, zmeans[non_nan_i, end])
53+
# and because z is y, it must increase along the second dimension
54+
@test issorted(zmeans[1, non_nan_i])
55+
end
56+
end

0 commit comments

Comments
 (0)