|
| 1 | + |
| 2 | +export CategoricalVector |
| 3 | + |
| 4 | +""" |
| 5 | +A CategoricalVector is an AbstractVector which is treated as a categorical axis regardless |
| 6 | +of the element type. Duplicate values are not allowed but are not filtered out. |
| 7 | +
|
| 8 | +A CategoricalVector axis can be indexed with an ClosedInterval, with a value, or with a |
| 9 | +vector of values. Use of a CategoricalVector{Tuple} axis allows indexing similar to the |
| 10 | +hierarchical index of the Python Pandas package or the R data.table package. |
| 11 | +
|
| 12 | +In general, indexing into a CategoricalVector will be much slower than the corresponding |
| 13 | +SortedVector or another sorted axis type, as linear search is required. |
| 14 | +
|
| 15 | +### Constructors |
| 16 | +
|
| 17 | +```julia |
| 18 | +CategoricalVector(x::AbstractVector) |
| 19 | +``` |
| 20 | +
|
| 21 | +### Arguments |
| 22 | +
|
| 23 | +* `x::AbstractVector` : the wrapped vector |
| 24 | +
|
| 25 | +### Examples |
| 26 | +
|
| 27 | +```julia |
| 28 | +v = CategoricalVector(collect([1; 8; 10:15])) |
| 29 | +A = AxisArray(reshape(1:16, 8, 2), v, [:a, :b]) |
| 30 | +A[Axis{:row}(1), :] |
| 31 | +A[Axis{:row}(10), :] |
| 32 | +A[Axis{:row}([1, 10]), :] |
| 33 | +
|
| 34 | +## Hierarchical index example with three key levels |
| 35 | +
|
| 36 | +data = reshape(1.:40., 20, 2) |
| 37 | +v = collect(zip([:a, :b, :c][rand(1:3,20)], [:x,:y][rand(1:2,20)], [:x,:y][rand(1:2,20)])) |
| 38 | +A = AxisArray(data, CategoricalVector(v), [:a, :b]) |
| 39 | +A[:b, :] |
| 40 | +A[[:a,:c], :] |
| 41 | +A[(:a,:x), :] |
| 42 | +A[(:a,:x,:x), :] |
| 43 | +``` |
| 44 | +""" |
| 45 | +immutable CategoricalVector{T} <: AbstractVector{T} |
| 46 | + data::AbstractVector{T} |
| 47 | +end |
| 48 | + |
| 49 | +Base.getindex(v::CategoricalVector, idx::Int) = v.data[idx] |
| 50 | +Base.getindex(v::CategoricalVector, idx::AbstractVector) = CategoricalVector(v.data[idx]) |
| 51 | + |
| 52 | +Base.length(v::CategoricalVector) = length(v.data) |
| 53 | +Base.size(v::CategoricalVector) = size(v.data) |
| 54 | +Base.size(v::CategoricalVector, i) = size(v.data, i) |
| 55 | +Base.indices(v::CategoricalVector) = indices(v.data) |
| 56 | + |
| 57 | +axistrait{T}(::Type{CategoricalVector{T}}) = Categorical |
| 58 | +checkaxis(::CategoricalVector) = nothing |
| 59 | + |
| 60 | + |
| 61 | +## Add some special indexing for CategoricalVector{Tuple}'s to achieve something like |
| 62 | +## Panda's hierarchical indexing |
| 63 | + |
| 64 | +axisindexes{T<:Tuple,S}(ax::Axis{S,CategoricalVector{T}}, idx) = axisindexes(ax, (idx,)) |
| 65 | + |
| 66 | +function axisindexes{T<:Tuple,S}(ax::Axis{S,CategoricalVector{T}}, idx::Tuple) |
| 67 | + collect(filter(ax_idx->_tuple_matches(ax.val[ax_idx], idx), indices(ax.val)...)) |
| 68 | +end |
| 69 | + |
| 70 | +function _tuple_matches(element::Tuple, idx::Tuple) |
| 71 | + length(idx) <= length(element) || return false |
| 72 | + |
| 73 | + for (x, y) in zip(element, idx) |
| 74 | + x == y || return false |
| 75 | + end |
| 76 | + |
| 77 | + return true |
| 78 | +end |
| 79 | + |
| 80 | +axisindexes{T<:Tuple,S}(ax::Axis{S,CategoricalVector{T}}, idx::AbstractArray) = |
| 81 | + vcat([axisindexes(ax, i) for i in idx]...) |
0 commit comments