Skip to content

Commit d488c79

Browse files
authored
update hashing to Julia 1.13 and use column names in data frame hashing (#3507)
1 parent 3f2e837 commit d488c79

File tree

4 files changed

+14
-5
lines changed

4 files changed

+14
-5
lines changed

src/abstractdataframe/abstractdataframe.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1902,8 +1902,9 @@ end
19021902
const hashdf_seed = UInt == UInt32 ? 0xfd8bb02e : 0x6215bada8c8c46de
19031903

19041904
function Base.hash(df::AbstractDataFrame, h::UInt)
1905-
h += hashdf_seed
1906-
h += hash(size(df))
1905+
h ⊻= hashdf_seed
1906+
h = hash(size(df), h)
1907+
h = hash(_names(df), h)
19071908
for i in 1:size(df, 2)
19081909
h = hash(df[!, i], h)
19091910
end

src/groupeddataframe/utils.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ end
5858
# Calculate the vector of `df` rows hash values.
5959
function hashrows(cols::Tuple{Vararg{AbstractVector}}, skipmissing::Bool)
6060
len = length(cols[1])
61-
rhashes = zeros(UInt, len)
61+
ref_val = @static Base.VERSION >= v"1.13.0-DEV" ? Base.HASH_SEED : UInt(0)
62+
rhashes = fill(ref_val, len)
6263
missings = fill(false, skipmissing ? len : 0)
6364
for (i, col) in enumerate(cols)
6465
rp = DataAPI.refpool(col)

src/join/core.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ Base.hash(ocr1::OnColRow, h::UInt) = throw(MethodError(hash, (ocr1, h)))
4040
function _prehash(oc::OnCol)
4141
h = oc.h
4242
resize!(h, oc.len)
43-
fill!(h, Base.tuplehash_seed)
43+
h0 = @static Base.VERSION >= v"1.13.0-DEV" ? Base.HASH_SEED : UInt(0)
44+
h0 ⊻= Base.tuplehash_seed
45+
fill!(h, h0)
4446
for col in reverse(oc.cols)
4547
h .= hash.(col, h)
4648
end

test/dataframe.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,12 @@ end
425425

426426
@test hash(DataFrame([1 2; 3 4], :auto)) == hash(DataFrame([1 2; 3 4], :auto))
427427
@test hash(DataFrame([1 2; 3 4], :auto)) != hash(DataFrame([1 3; 2 4], :auto))
428-
@test hash(DataFrame([1 2; 3 4], :auto)) == hash(DataFrame([1 2; 3 4], :auto), zero(UInt))
428+
@test hash(DataFrame([1 2; 3 4], :auto)) != hash(DataFrame([1 2; 3 4], [:x2, :x1]))
429+
430+
@test hash(DataFrame([1 2; 3 4], :auto), UInt(10)) == hash(DataFrame([1 2; 3 4], :auto), UInt(10))
431+
@test hash(DataFrame([1 2; 3 4], :auto), UInt(10)) != hash(DataFrame([1 2; 3 4], :auto), UInt(11))
432+
@test hash(DataFrame([1 2; 3 4], :auto), UInt(10)) != hash(DataFrame([1 3; 2 4], :auto), UInt(10))
433+
@test hash(DataFrame([1 2; 3 4], :auto), UInt(10)) != hash(DataFrame([1 2; 3 4], [:x2, :x1]), UInt(10))
429434
end
430435

431436
@testset "deleteat!" begin

0 commit comments

Comments
 (0)