Skip to content

Commit 54f7a45

Browse files
committed
avoid Core.Box in the package
I used JuliaLang/julia#60478 to find all the `Core.Box` instances in the package and then fix them.
1 parent aeea2c2 commit 54f7a45

File tree

8 files changed

+110
-96
lines changed

8 files changed

+110
-96
lines changed

src/abstractdataframe/abstractdataframe.jl

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -735,34 +735,36 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
735735
data = DataFrame()
736736
data.variable = propertynames(df)
737737

738+
predefined_funs_local = predefined_funs
739+
738740
# An array of Dicts for summary statistics
739741
col_stats_dicts = map(eachcol(df)) do col
740742
if eltype(col) >: Missing
741743
t = skipmissing(col)
742-
d = get_stats(t, predefined_funs)
744+
d = get_stats(t, predefined_funs_local)
743745
get_stats!(d, t, custom_funs)
744746
else
745-
d = get_stats(col, predefined_funs)
747+
d = get_stats(col, predefined_funs_local)
746748
get_stats!(d, col, custom_funs)
747749
end
748750

749-
if :nmissing in predefined_funs
751+
if :nmissing in predefined_funs_local
750752
d[:nmissing] = count(ismissing, col)
751753
end
752754

753-
if :nnonmissing in predefined_funs
755+
if :nnonmissing in predefined_funs_local
754756
d[:nnonmissing] = count(!ismissing, col)
755757
end
756758

757-
if :first in predefined_funs
759+
if :first in predefined_funs_local
758760
d[:first] = isempty(col) ? nothing : first(col)
759761
end
760762

761-
if :last in predefined_funs
763+
if :last in predefined_funs_local
762764
d[:last] = isempty(col) ? nothing : last(col)
763765
end
764766

765-
if :eltype in predefined_funs
767+
if :eltype in predefined_funs_local
766768
d[:eltype] = eltype(col)
767769
end
768770

src/abstractdataframe/iteration.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -713,11 +713,11 @@ function Base.reduce(::typeof(vcat),
713713

714714
if source !== nothing
715715
len = length(dfs)
716-
if source isa SymbolOrString
717-
col, vals = source, 1:len
716+
col, vals = if source isa SymbolOrString
717+
source, 1:len
718718
else
719719
@assert source isa Pair{<:SymbolOrString,<:AbstractVector}
720-
col, vals = source
720+
source
721721
end
722722

723723
if columnindex(res, col) > 0

src/abstractdataframe/subset.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ function subset!(gdf::GroupedDataFrame, @nospecialize(args...); skipmissing::Boo
515515
end
516516

517517
# update GroupedDataFrame indices in a thread safe way
518-
Threads.lock(lazy_lock) do
518+
Base.@lock lazy_lock begin
519519
gdf.groups = newgroups
520520
gdf.idx = nothing
521521
gdf.starts = nothing

src/dataframe/dataframe.jl

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,17 @@ mutable struct DataFrame <: AbstractDataFrame
216216
end
217217
len == -1 && (len = 1) # we got no vectors so make one row of scalars
218218

219+
len_val = len
220+
219221
# we write into columns as we know that it is guaranteed
220222
# that it was freshly allocated in the outer constructor
221-
if copycols && len >= 100_000 && length(columns) > 1 && Threads.nthreads() > 1
223+
if copycols && len_val >= 100_000 && length(columns) > 1 && Threads.nthreads() > 1
222224
@sync for i in eachindex(columns)
223-
@spawn columns[i] = _preprocess_column(columns[i], len, copycols)
225+
@spawn columns[i] = _preprocess_column(columns[i], len_val, copycols)
224226
end
225227
else
226228
for i in eachindex(columns)
227-
columns[i] = _preprocess_column(columns[i], len, copycols)
229+
columns[i] = _preprocess_column(columns[i], len_val, copycols)
228230
end
229231
end
230232

@@ -901,8 +903,9 @@ function _deleteat!_helper(df::DataFrame, drop)
901903
return df
902904
end
903905

904-
if any(c -> c === drop || Base.mightalias(c, drop), cols)
905-
drop = copy(drop)
906+
drop_local = drop
907+
if any(c -> c === drop_local || Base.mightalias(c, drop_local), cols)
908+
drop = copy(drop_local)
906909
end
907910

908911
n = nrow(df)

src/groupeddataframe/complextransforms.jl

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,27 +32,29 @@ function _combine_with_first((first,)::Ref{Any},
3232
@assert incols isa Union{Nothing, AbstractVector, Tuple, NamedTuple}
3333
extrude = false
3434

35+
firstrow = first
36+
wrapped_first = nothing
3537
lgd = length(gd)
36-
if first isa AbstractDataFrame
38+
if firstrow isa AbstractDataFrame
3739
n = 0
38-
eltys = eltype.(eachcol(first))
39-
elseif first isa NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}
40+
eltys = eltype.(eachcol(firstrow))
41+
elseif firstrow isa NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}
4042
n = 0
41-
eltys = map(eltype, first)
42-
elseif first isa DataFrameRow
43+
eltys = map(eltype, firstrow)
44+
elseif firstrow isa DataFrameRow
4345
n = lgd
44-
eltys = [eltype(parent(first)[!, i]) for i in parentcols(index(first))]
45-
elseif first isa Tables.AbstractRow
46+
eltys = [eltype(parent(firstrow)[!, i]) for i in parentcols(index(firstrow))]
47+
elseif firstrow isa Tables.AbstractRow
4648
n = lgd
47-
eltys = [typeof(Tables.getcolumn(first, name)) for name in Tables.columnnames(first)]
48-
elseif !firstmulticol && first[1] isa Union{AbstractArray{<:Any, 0}, Ref}
49+
eltys = [typeof(Tables.getcolumn(firstrow, name)) for name in Tables.columnnames(firstrow)]
50+
elseif !firstmulticol && firstrow[1] isa Union{AbstractArray{<:Any, 0}, Ref}
4951
extrude = true
50-
first = wrap_row(first[1], firstcoltype(firstmulticol))
52+
wrapped_first = wrap_row(firstrow[1], firstcoltype(firstmulticol))
5153
n = lgd
52-
eltys = (typeof(first[1]),)
54+
eltys = (typeof(wrapped_first[1]),)
5355
else # other NamedTuple giving a single row
5456
n = lgd
55-
eltys = map(typeof, first)
57+
eltys = map(typeof, firstrow)
5658
if any(x -> x <: AbstractVector, eltys)
5759
throw(ArgumentError("mixing single values and vectors in a named tuple is not allowed"))
5860
end
@@ -65,19 +67,21 @@ function _combine_with_first((first,)::Ref{Any},
6567
sizehint!(idx, lgd)
6668

6769
local initialcols
70+
firstrow_local = extrude ? wrapped_first : firstrow
71+
6872
let eltys=eltys, n=n # Workaround for julia#15276
69-
initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(first))
73+
initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(firstrow_local))
7074
end
71-
targetcolnames = first isa Tables.AbstractRow ?
72-
tuple(Tables.columnnames(first)...) :
73-
tuple(propertynames(first)...)
74-
if !extrude && first isa Union{AbstractDataFrame,
75+
targetcolnames = firstrow_local isa Tables.AbstractRow ?
76+
tuple(Tables.columnnames(firstrow_local)...) :
77+
tuple(propertynames(firstrow_local)...)
78+
if !extrude && firstrow_local isa Union{AbstractDataFrame,
7579
NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}
76-
outcols, finalcolnames = _combine_tables_with_first!(first, initialcols, idx, 1, 1,
80+
outcols, finalcolnames = _combine_tables_with_first!(firstrow_local, initialcols, idx, 1, 1,
7781
f, gd, incols, targetcolnames,
7882
firstcoltype(firstmulticol))
7983
else
80-
outcols, finalcolnames = _combine_rows_with_first!(Ref{Any}(first),
84+
outcols, finalcolnames = _combine_rows_with_first!(Ref{Any}(firstrow_local),
8185
Ref{Any}(initialcols),
8286
Ref{Any}(f),
8387
gd,
@@ -144,8 +148,7 @@ function _combine_rows_with_first_task!(tid::Integer,
144148
j = fill_row!(row, outcols, i, 1, colnames)
145149
if j !== nothing # Need to widen column
146150
# If another thread is already widening outcols, wait until it's done
147-
lock(widen_type_lock)
148-
try
151+
Base.@lock widen_type_lock begin
149152
newoutcols = outcolsref[]
150153
# Workaround for julia#15276
151154
newoutcols = let i=i, j=j, newoutcols=newoutcols, row=row
@@ -172,8 +175,6 @@ function _combine_rows_with_first_task!(tid::Integer,
172175

173176
outcolsref[] = newoutcols
174177
type_widened[tid] = false
175-
finally
176-
unlock(widen_type_lock)
177178
end
178179
return _combine_rows_with_first_task!(tid, rowstart, rowend, i+1, newoutcols, outcolsref,
179180
type_widened, widen_type_lock,
@@ -185,7 +186,7 @@ function _combine_rows_with_first_task!(tid::Integer,
185186
# This doesn't have to happen immediately (hence type_widened isn't atomic),
186187
# but the more we wait the more data will have to be copied
187188
if type_widened[tid]
188-
lock(widen_type_lock) do
189+
Base.@lock widen_type_lock begin
189190
type_widened[tid] = false
190191
newoutcols = outcolsref[]
191192
for k in 1:length(outcols)
@@ -276,13 +277,14 @@ function _combine_rows_with_first!((firstrow,)::Ref{Any},
276277
partitions = (2:len,)
277278
end
278279
widen_type_lock = ReentrantLock()
279-
outcolsref = Ref{NTuple{<:Any, AbstractVector}}(outcols)
280+
outcols_local = outcols
281+
outcolsref = Ref{NTuple{<:Any, AbstractVector}}(outcols_local)
280282
type_widened = fill(false, length(partitions))
281283
tasks = Vector{Task}(undef, length(partitions))
282284
for (tid, idx) in enumerate(partitions)
283285
tasks[tid] =
284286
@spawn_or_run_task threads _combine_rows_with_first_task!(tid, first(idx), last(idx), first(idx),
285-
outcols, outcolsref,
287+
outcols_local, outcolsref,
286288
type_widened, widen_type_lock,
287289
f, gd, starts, ends, incols, colnames,
288290
firstcoltype(firstmulticol))
@@ -378,10 +380,10 @@ function _combine_tables_with_first!(first::Union{AbstractDataFrame,
378380
_ncol(rows) == 0 && continue
379381
if isempty(colnames)
380382
newcolnames = tuple(propertynames(rows)...)
381-
if rows isa AbstractDataFrame
382-
eltys = eltype.(eachcol(rows))
383+
eltys = if rows isa AbstractDataFrame
384+
eltype.(eachcol(rows))
383385
else
384-
eltys = map(eltype, rows)
386+
map(eltype, rows)
385387
end
386388
initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], 0), _ncol(rows))
387389
return _combine_tables_with_first!(rows, initialcols, idx, i, 1,

src/groupeddataframe/splitapplycombine.jl

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ function _combine_prepare_norm(gd::GroupedDataFrame,
124124
@assert length(gd_keys) > 0 || idx == gd.idx
125125
# in this case we are sure that the result GroupedDataFrame has the
126126
# same structure as the source except that grouping columns are at the start
127-
return Threads.lock(gd.lazy_lock) do
127+
Base.@lock gd.lazy_lock begin
128128
return GroupedDataFrame(newparent, copy(gd.cols), gd.groups,
129129
getfield(gd, :idx), getfield(gd, :starts),
130130
getfield(gd, :ends), gd.ngroups,
@@ -298,19 +298,19 @@ function _combine_process_proprow((cs_i,)::Ref{Any},
298298

299299
# introduce outcol1 and outcol2 as without it outcol is boxed
300300
# since it is later used inside the anonymous function we return
301-
if getfield(gd, :idx) === nothing
301+
outcol = if getfield(gd, :idx) === nothing
302302
outcol1 = zeros(Float64, length(gd) + 1)
303303
@inbounds @simd for gix in gd.groups
304304
outcol1[gix + 1] += 1
305305
end
306306
popfirst!(outcol1)
307307
outcol1 ./= sum(outcol1)
308-
outcol = outcol1
308+
outcol1
309309
else
310310
outcol2 = Vector{Float64}(undef, length(gd))
311311
outcol2 .= gd.ends .- gd.starts .+ 1
312312
outcol2 ./= sum(outcol2)
313-
outcol = outcol2
313+
outcol2
314314
end
315315

316316
return function()
@@ -384,7 +384,7 @@ function _combine_process_callable(wcs_i::Ref{Any},
384384

385385
if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame,
386386
NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
387-
lock(gd.lazy_lock) do
387+
Base.@lock gd.lazy_lock begin
388388
# if idx_agg was not computed yet it is NOTHING_IDX_AGG
389389
# in this case if we are not passed a vector compute it.
390390
if idx_agg[] === NOTHING_IDX_AGG
@@ -396,6 +396,7 @@ function _combine_process_callable(wcs_i::Ref{Any},
396396
end
397397
end
398398
@assert length(outcols) == length(nms)
399+
idx_local = idx
399400
return function()
400401
for j in eachindex(outcols)
401402
outcol = outcols[j]
@@ -408,11 +409,11 @@ function _combine_process_callable(wcs_i::Ref{Any},
408409
# we have seen this col but it is not allowed to replace it
409410
optional || throw(ArgumentError("duplicate output column name: :$out_col_name"))
410411
@assert trans_res[loc].optional && trans_res[loc].name == out_col_name
411-
trans_res[loc] = TransformationResult(idx, outcol, out_col_name, optional_i, 0)
412+
trans_res[loc] = TransformationResult(idx_local, outcol, out_col_name, optional_i, 0)
412413
seen_cols[out_col_name] = (optional_i, loc)
413414
end
414415
else
415-
push!(trans_res, TransformationResult(idx, outcol, out_col_name, optional_i, 0))
416+
push!(trans_res, TransformationResult(idx_local, outcol, out_col_name, optional_i, 0))
416417
seen_cols[out_col_name] = (optional_i, length(trans_res))
417418
end
418419
end
@@ -443,7 +444,7 @@ function _combine_process_pair_symbol(optional_i::Bool,
443444
end
444445
# if idx_agg was not computed yet it is NOTHING_IDX_AGG
445446
# in this case if we are not passed a vector compute it.
446-
lock(gd.lazy_lock) do
447+
Base.@lock gd.lazy_lock begin
447448
if !(firstres isa AbstractVector) && idx_agg[] === NOTHING_IDX_AGG
448449
idx_agg[] = Vector{Int}(undef, length(gd))
449450
fillfirst!(nothing, idx_agg[], 1:length(gd.groups), gd)
@@ -463,13 +464,13 @@ function _combine_process_pair_symbol(optional_i::Bool,
463464
@assert length(outcols) == 1
464465
outcol = outcols[1]
465466

466-
if (source_cols isa Int ||
467+
metacol = if (source_cols isa Int ||
467468
(source_cols isa AbstractVector{Int} && length(source_cols) == 1)) &&
468469
(only(source_cols) == columnindex(parent(gd), out_col_name) ||
469470
only(wfun) === identity || only(wfun) === copy)
470-
metacol = only(source_cols)
471+
only(source_cols)
471472
else
472-
metacol = 0
473+
0
473474
end
474475

475476
return function()
@@ -546,7 +547,7 @@ function _combine_process_pair_astable(optional_i::Bool,
546547
wincols, threads)
547548
if !(firstres isa Union{AbstractVecOrMat, AbstractDataFrame,
548549
NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
549-
lock(gd.lazy_lock) do
550+
Base.@lock gd.lazy_lock begin
550551
# if idx_agg was not computed yet it is nothing
551552
# in this case if we are not passed a vector compute it.
552553
if idx_agg[] === NOTHING_IDX_AGG
@@ -568,24 +569,27 @@ function _combine_process_pair_astable(optional_i::Bool,
568569
nms = out_col_name
569570
end
570571
end
572+
outcols_local = outcols
573+
nms_local = nms
574+
idx_local = idx
571575
return function()
572-
for j in eachindex(outcols)
573-
outcol = outcols[j]
574-
out_col_name = nms[j]
575-
if haskey(seen_cols, out_col_name)
576-
optional, loc = seen_cols[out_col_name]
576+
for j in eachindex(outcols_local)
577+
outcol = outcols_local[j]
578+
out_col_name_j = nms_local[j]
579+
if haskey(seen_cols, out_col_name_j)
580+
optional, loc = seen_cols[out_col_name_j]
577581
# if column was seen and it is optional now ignore it
578582
if !optional_i
579-
optional, loc = seen_cols[out_col_name]
583+
optional, loc = seen_cols[out_col_name_j]
580584
# we have seen this col but it is not allowed to replace it
581-
optional || throw(ArgumentError("duplicate output column name: :$out_col_name"))
582-
@assert trans_res[loc].optional && trans_res[loc].name == out_col_name
583-
trans_res[loc] = TransformationResult(idx, outcol, out_col_name, optional_i, 0)
584-
seen_cols[out_col_name] = (optional_i, loc)
585+
optional || throw(ArgumentError("duplicate output column name: :$out_col_name_j"))
586+
@assert trans_res[loc].optional && trans_res[loc].name == out_col_name_j
587+
trans_res[loc] = TransformationResult(idx_local, outcol, out_col_name_j, optional_i, 0)
588+
seen_cols[out_col_name_j] = (optional_i, loc)
585589
end
586590
else
587-
push!(trans_res, TransformationResult(idx, outcol, out_col_name, optional_i, 0))
588-
seen_cols[out_col_name] = (optional_i, length(trans_res))
591+
push!(trans_res, TransformationResult(idx_local, outcol, out_col_name_j, optional_i, 0))
592+
seen_cols[out_col_name_j] = (optional_i, length(trans_res))
589593
end
590594
end
591595
end
@@ -675,15 +679,15 @@ function _combine(gd::GroupedDataFrame,
675679
return Int[], DataFrame(), Int[]
676680
end
677681

678-
if keeprows
682+
idx_keeprows = if keeprows
679683
if nrow(parent(gd)) > 0 && minimum(gd.groups) == 0
680684
throw(ArgumentError("select and transform do not support " *
681685
"`GroupedDataFrame`s from which some groups have "*
682686
"been dropped (including skipmissing=true)"))
683687
end
684-
idx_keeprows = prepare_idx_keeprows(gd.idx, gd.starts, gd.ends, nrow(parent(gd)))
688+
prepare_idx_keeprows(gd.idx, gd.starts, gd.ends, nrow(parent(gd)))
685689
else
686-
idx_keeprows = Int[]
690+
Int[]
687691
end
688692

689693
idx_agg = Ref(NOTHING_IDX_AGG)

0 commit comments

Comments
 (0)