Skip to content

Commit 5c93bf2

Browse files
LilithHafnernsajkoadienes
authored
Union-split on Expr, Symbol, and LineNumberNode when hashing Exprs (#59378)
```julia-repl x@fedora:~/.julia/dev/julia$ julia +pr59378 o | Version 1.13.0-DEV.1043 (2025-09-07) o o | lh/hash-expr-union-split/8a95cf82d3d (fork: 8 commits, 14 days) julia> expr = Meta.parseall(read("base/show.jl", String)); julia> using ChairmarksExtras julia> @Btime expr hash 395.753 μs (23358 allocs: 364.969 KiB) 0x8e1ffc47fe5dc80b julia> @Btime :(sin(x^2) + cos(x^2)) hash 144.778 ns (15 allocs: 240 bytes) 0xc837adb769107933 julia> x@fedora:~/.julia/dev/julia$ julia +nightly o | Version 1.13.0-DEV.1096 (2025-09-07) o o | Commit 8a384ab (0 days old master) julia> expr = Meta.parseall(read("base/show.jl", String)); julia> using ChairmarksExtras julia> @Btime expr hash 826.924 μs (22422 allocs: 350.344 KiB) 0xf4f9c5fc15a95298 julia> @Btime :(sin(x^2) + cos(x^2)) hash 275.557 ns (14 allocs: 224 bytes) 0xc837adb769107933 ``` Notably, the hash of that big expressions changes between these versions because it contains global refs that have different `objectid`s on these two versions. --------- Co-authored-by: Neven Sajko <4944410+nsajko@users.noreply.github.com> Co-authored-by: Andy Dienes <51664769+adienes@users.noreply.github.com>
1 parent 865b8be commit 5c93bf2

File tree

2 files changed

+38
-7
lines changed

2 files changed

+38
-7
lines changed

base/hashing.jl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,17 +236,21 @@ end
236236
## symbol & expression hashing ##
237237
if UInt === UInt64
238238
# conservatively hash using == equality of all of the data, even though == often uses === internally
239-
hash(x::Expr, h::UInt) = hash(x.args, hash(x.head, h 0x83c7900696d26dc6))
240239
hash(x::QuoteNode, h::UInt) = hash(x.value, h 0x2c97bf8b3de87020)
241240
hash(x::PhiNode, h::UInt) = hash(x.edges, hash(x.values, h 0x2c97bf8b3de87020))
242241
hash(x::PhiCNode, h::UInt) = hash(x.values, h 0x2c97bf8b3de87020)
243242
else
244-
hash(x::Expr, h::UInt) = hash(x.args, hash(x.head, h 0x469d72af))
245243
hash(x::QuoteNode, h::UInt) = hash(x.value, h 0x469d72af)
246244
hash(x::PhiNode, h::UInt) = hash(x.edges, hash(x.values, h 0x469d72af))
247245
hash(x::PhiCNode, h::UInt) = hash(x.values, h 0x469d72af)
248246
end
249247

248+
function hash(x::Expr, h::UInt)
249+
h = hash(x.head, h (UInt === UInt64 ? 0x83c7900696d26dc6 : 0x469d72af))
250+
# Hint that `x.args::Vector{Any}` is mostly Expr, Symbol, and LineNumberNode.
251+
hash_shaped(x.args, h hash_abstractarray_seed, (Val{Expr}(), Val{Symbol}(), Val{LineNumberNode}()))
252+
end
253+
250254
function hash(x::CodeInfo, h::UInt)
251255
h ⊻= UInt === UInt64 ? 0x2c97bf8b3de87020 : 0x469d72af
252256
for i in 1:nfields(x)

base/multidimensional.jl

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2043,7 +2043,34 @@ function _hash_fib(A, h::UInt)
20432043
return hash_uint(h)
20442044
end
20452045

2046-
function hash_shaped(A, h::UInt)
2046+
"""
2047+
union_split(f, x, ts::Tuple{Vararg{Val}}, args...)
2048+
2049+
call `f(x, args...)`, union-splitting on all the types specified by `ts`
2050+
2051+
`union_split(f, x, (Val{T1}(), Val{T2}()), y, z)` is equivalent to
2052+
2053+
```
2054+
if x isa T1
2055+
f(x, y, z)
2056+
elseif x isa T2
2057+
f(x, y, z)
2058+
else
2059+
f(x, y, z)
2060+
end
2061+
```
2062+
"""
2063+
@inline function union_split(f, @nospecialize(x), ts::Tuple{Val{T}, Vararg{Val,N}}, args...) where {T, N}
2064+
if x isa T
2065+
f(x, args...)
2066+
else
2067+
union_split(f, x, Base.tail(ts), args...)
2068+
end
2069+
end
2070+
@inline union_split(f, x, ::Tuple{}, args::Vararg{Any, N}) where {N} = f(x, args...)
2071+
2072+
function hash_shaped(A, h0::UInt, eltype_hint=())
2073+
h::UInt = h0
20472074
# Axes are themselves AbstractArrays, so hashing them directly would stack overflow
20482075
# Instead hash the tuple of firsts and lasts along each dimension
20492076
h = hash(map(first, axes(A)), h)
@@ -2053,20 +2080,20 @@ function hash_shaped(A, h::UInt)
20532080
if len < 8
20542081
# for the shortest arrays we chain directly
20552082
for elt in A
2056-
h = hash(elt, h)
2083+
h = union_split(hash, elt, eltype_hint, h)
20572084
end
20582085
return h
20592086
elseif len < 32768
20602087
# separate accumulator streams, unrolled
2061-
@nexprs 8 i -> p_i = h
2088+
@nexprs 8 i -> p_i::UInt = h
20622089
n = 1
20632090
limit = len - 7
20642091
while n <= limit
2065-
@nexprs 8 i -> p_i = hash(A[n + i - 1], p_i)
2092+
@nexprs 8 i -> p_i = union_split(hash, A[n + i - 1], eltype_hint, p_i)
20662093
n += 8
20672094
end
20682095
while n <= len
2069-
p_1 = hash(A[n], p_1)
2096+
p_1 = union_split(hash, A[n], eltype_hint, p_1)
20702097
n += 1
20712098
end
20722099
# fold all streams back together

0 commit comments

Comments
 (0)