This repository was archived by the owner on Aug 17, 2021. It is now read-only.
forked from JuliaArrays/AxisArrays.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcategoricalvector.jl
More file actions
85 lines (61 loc) · 2.55 KB
/
categoricalvector.jl
File metadata and controls
85 lines (61 loc) · 2.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
export CategoricalVector
"""
A CategoricalVector is an AbstractVector which is treated as a categorical axis regardless
of the element type. Duplicate values are not allowed but are not filtered out.
A CategoricalVector axis can be indexed with an ClosedInterval, with a value, or with a
vector of values. Use of a CategoricalVector{Tuple} axis allows indexing similar to the
hierarchical index of the Python Pandas package or the R data.table package.
In general, indexing into a CategoricalVector will be much slower than the corresponding
SortedVector or another sorted axis type, as linear search is required.
### Constructors
```julia
CategoricalVector(x::AbstractVector)
```
### Arguments
* `x::AbstractVector` : the wrapped vector
### Examples
```julia
v = CategoricalVector(collect([1; 8; 10:15]))
A = AxisArray(reshape(1:16, 8, 2), v, [:a, :b])
A[Axis{:row}(1), :]
A[Axis{:row}(10), :]
A[Axis{:row}([1, 10]), :]
## Hierarchical index example with three key levels
data = reshape(1.:40., 20, 2)
v = collect(zip([:a, :b, :c][rand(1:3,20)], [:x,:y][rand(1:2,20)], [:x,:y][rand(1:2,20)]))
A = AxisArray(data, CategoricalVector(v), [:a, :b])
A[:b, :]
A[[:a,:c], :]
A[(:a,:x), :]
A[(:a,:x,:x), :]
```
"""
immutable CategoricalVector{T, A<:AbstractVector{T}} <: AbstractVector{T}
data::A
end
function CategoricalVector(data::AbstractVector{T}) where T
CategoricalVector{T, typeof(data)}(data)
end
Base.getindex(v::CategoricalVector, idx::Int) = v.data[idx]
Base.getindex(v::CategoricalVector, idx::AbstractVector) = CategoricalVector(v.data[idx])
Base.length(v::CategoricalVector) = length(v.data)
Base.size(v::CategoricalVector) = size(v.data)
Base.size(v::CategoricalVector, i) = size(v.data, i)
Base.indices(v::CategoricalVector) = indices(v.data)
axistrait(::Type{CategoricalVector{T,A}}) where {T,A} = Categorical
checkaxis(::CategoricalVector) = nothing
## Add some special indexing for CategoricalVector{Tuple}'s to achieve something like
## Panda's hierarchical indexing
axisindexes{T<:Tuple,S,A}(ax::Axis{S,CategoricalVector{T,A}}, idx) = axisindexes(ax, (idx,))
function axisindexes{T<:Tuple,S,A}(ax::Axis{S,CategoricalVector{T,A}}, idx::Tuple)
collect(filter(ax_idx->_tuple_matches(ax.val[ax_idx], idx), indices(ax.val)...))
end
function _tuple_matches(element::Tuple, idx::Tuple)
length(idx) <= length(element) || return false
for (x, y) in zip(element, idx)
x == y || return false
end
return true
end
axisindexes{T<:Tuple,S,A}(ax::Axis{S,CategoricalVector{T,A}}, idx::AbstractArray) =
vcat([axisindexes(ax, i) for i in idx]...)