Skip to content

Commit c562362

Browse files
Only GC when needed. Reduce allocs from mechanics. Add PrecompileTools workload. (#419)
* Reduce overhead in hot benchmark loop - Make `Benchmark` parametric (`Benchmark{F,Q}`) so `samplefunc` and `quote_vals` have concrete types, eliminating dynamic dispatch and boxing on every sample call - Skip `gcscrub()` before `gctrial`/`gcsample` when the previous sample (or warmup) reported zero allocations — nothing to collect - Pre-allocate `Trial` vectors with `sizehint!` based on the first real sample time, avoiding repeated heap growth and GC churn from the harness itself during the run - Add a test asserting the harness itself reports zero allocations for a zero-allocation benchmark * add a PrecompileTools workload * Use function barriers and reduce allocations in hot loops Revert Benchmark to non-parametric (easier to pass around) and use function barriers (_run_inner, _lineartrial_inner) so Julia specializes the sampling loops on concrete samplefunc/quote_vals types without parameterizing the struct. - Skip GC scrub when warmup/sample reported zero allocations - Temporarily set evals=1 for warmup instead of allocating new Parameters - Use explicit push!(trial, s[1], s[2], s[3], s[4]) instead of s[1:(end-1)]... to avoid intermediate tuple allocation - resize! instead of slice-copy in _lineartrial_inner Reduces steady-state allocations from ~102K to ~96 per benchmark run. * add concurrency cancel in progress * don't capture the returned result if not exposed * Use Ref{SampleResult} to avoid heap-allocating return tuples The samplefunc is stored as `Function` (abstract type), so every call returned a heap-allocated tuple through dynamic dispatch. With ~10K samples per benchmark, this was ~10K allocs per run. Now the caller passes a pre-allocated Ref{SampleResult} that the samplefunc writes into, reducing per-benchmark allocs from ~10K to ~12 (all structural: Parameters copy, Trial, vectors, etc). * Update .gitignore * format Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * don't specialize show methods on IO type --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 78bed8d commit c562362

8 files changed

Lines changed: 100 additions & 22 deletions

File tree

.github/workflows/CI.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
branches:
88
- main
99
tags: '*'
10+
concurrency:
11+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
12+
cancel-in-progress: true
1013
jobs:
1114
test:
1215
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ github.event_name }}

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ test/x.json
66
docs/Manifest.toml
77
docs/build
88
docs/src/assets/indigo.css
9-
Manifest.toml
9+
Manifest.toml
10+
.DS_Store

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ version = "1.7.0"
66
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
77
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
88
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
9+
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
910
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1011
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
1112
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -16,6 +17,7 @@ Aqua = "0.8"
1617
Compat = "4.11"
1718
JSON = "0.18, 0.19, 0.20, 0.21, 1.2"
1819
Logging = "<0.0.1, 1"
20+
PrecompileTools = "1"
1921
Printf = "<0.0.1, 1"
2022
Profile = "<0.0.1, 1"
2123
Statistics = "<0.0.1, 1"

src/BenchmarkTools.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ using Printf
1010
using Profile
1111
using Compat
1212

13+
using PrecompileTools: @compile_workload, @setup_workload
14+
1315
##############
1416
# Parameters #
1517
##############
@@ -80,4 +82,12 @@ export tune!,
8082

8183
include("serialization.jl")
8284

85+
@setup_workload begin
86+
@compile_workload begin
87+
s = @benchmark 1 + 1
88+
io = IOContext(IOBuffer(), :color => true)
89+
show(io, MIME("text/plain"), s)
90+
end
91+
end
92+
8393
end # module BenchmarkTools

src/execution.jl

Lines changed: 65 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ mutable struct Benchmark
2020
params::Parameters
2121
end
2222

23+
const SampleResult = Tuple{Float64,Float64,Int,Int}
24+
2325
params(b::Benchmark) = b.params
2426

2527
function loadparams!(b::Benchmark, params::Parameters, fields...)
@@ -106,24 +108,57 @@ end
106108
# Note that trials executed via `run` and `lineartrial` are always executed at top-level
107109
# scope, in order to allow transfer of locally-scoped variables into benchmark scope.
108110

109-
function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, kwargs...)
111+
function _run(
112+
b::Benchmark,
113+
p::Parameters;
114+
verbose=false,
115+
pad="",
116+
warmup=true,
117+
capture_result=true,
118+
kwargs...,
119+
)
110120
params = Parameters(p; kwargs...)
111121
@assert params.seconds > 0.0 "time limit must be greater than 0.0"
122+
sample_ref = Ref{SampleResult}((0.0, 0.0, 0, 0))
112123
if warmup
113-
b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
124+
saved_evals = params.evals
125+
params.evals = 1
126+
b.samplefunc(b.quote_vals, params, sample_ref, nothing)
127+
warmup_allocs = sample_ref[][4]
128+
params.evals = saved_evals
129+
params.gctrial && warmup_allocs > 0 && gcscrub()
114130
end
115131
trial = Trial(params)
116-
params.gctrial && gcscrub()
117132
start_time = Base.time()
118-
s = b.samplefunc(b.quote_vals, params)
119-
push!(trial, s[1:(end - 1)]...)
120-
return_val = s[end]
133+
b.samplefunc(b.quote_vals, params, sample_ref, nothing)
134+
s = sample_ref[]
135+
push!(trial, s[1], s[2], s[3], s[4])
136+
sample_time_s = s[1] * params.evals / 1e9
137+
estimated_remaining = if sample_time_s > 0
138+
min(
139+
params.samples - 1,
140+
ceil(Int, (params.seconds - (Base.time() - start_time)) / sample_time_s),
141+
)
142+
else
143+
params.samples - 1
144+
end
145+
sizehint!(trial.times, 1 + estimated_remaining)
146+
sizehint!(trial.gctimes, 1 + estimated_remaining)
121147
iters = 2
122148
while (Base.time() - start_time) < params.seconds && iters params.samples
123-
params.gcsample && gcscrub()
124-
push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
149+
params.gcsample && s[4] > 0 && gcscrub()
150+
b.samplefunc(b.quote_vals, params, sample_ref, nothing)
151+
s = sample_ref[]
152+
push!(trial, s[1], s[2], s[3], s[4])
125153
iters += 1
126154
end
155+
return_val = if capture_result
156+
result_ref = Ref{Any}()
157+
b.samplefunc(b.quote_vals, params, sample_ref, result_ref)
158+
result_ref[]
159+
else
160+
nothing
161+
end
127162
return trial, return_val
128163
end
129164

@@ -140,7 +175,7 @@ function Base.run(
140175
ndone=NaN,
141176
kwargs...,
142177
)
143-
return run_result(b, p; kwargs...)[1]
178+
return run_result(b, p; capture_result=false, kwargs...)[1]
144179
end
145180

146181
"""
@@ -182,18 +217,25 @@ function _lineartrial(b::Benchmark, p::Parameters=b.params; maxevals=RESOLUTION,
182217
params = Parameters(p; kwargs...)
183218
estimates = zeros(maxevals)
184219
completed = 0
220+
sample_ref = Ref{SampleResult}((0.0, 0.0, 0, 0))
185221
params.evals = 1
186-
b.samplefunc(b.quote_vals, params) #warmup sample
187-
params.gctrial && gcscrub()
222+
b.samplefunc(b.quote_vals, params, sample_ref, nothing)
223+
warmup_allocs = sample_ref[][4]
224+
params.gctrial && warmup_allocs > 0 && gcscrub()
188225
start_time = time()
226+
prev_allocs = warmup_allocs
189227
for evals in eachindex(estimates)
190-
params.gcsample && gcscrub()
228+
params.gcsample && prev_allocs > 0 && gcscrub()
191229
params.evals = evals
192-
estimates[evals] = first(b.samplefunc(b.quote_vals, params))
230+
b.samplefunc(b.quote_vals, params, sample_ref, nothing)
231+
s = sample_ref[]
232+
estimates[evals] = s[1]
233+
prev_allocs = s[4]
193234
completed += 1
194235
((time() - start_time) > params.seconds) && break
195236
end
196-
return estimates[1:completed]
237+
resize!(estimates, completed)
238+
return estimates
197239
end
198240

199241
function warmup(item; verbose::Bool=true)
@@ -605,7 +647,10 @@ function generate_benchmark_definition(
605647
$(core_body)
606648
end
607649
@noinline function $(samplefunc)(
608-
$(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
650+
$(Expr(:tuple, quote_vars...)),
651+
__params::$BenchmarkTools.Parameters,
652+
__sample_ref::Ref{$BenchmarkTools.SampleResult},
653+
__result_ref::Union{Ref{Any},Nothing},
609654
)
610655
$(setup)
611656
__evals = __params.evals
@@ -618,6 +663,9 @@ function generate_benchmark_definition(
618663
__sample_time = time_ns() - __start_time
619664
__gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
620665
$(teardown)
666+
if __result_ref !== nothing
667+
__result_ref[] = __return_val
668+
end
621669
__time = max((__sample_time / __evals) - __params.overhead, 0.001)
622670
__gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
623671
__memory = Int(Base.fld(__gcdiff.allocd, __evals))
@@ -630,7 +678,8 @@ function generate_benchmark_definition(
630678
__evals,
631679
),
632680
)
633-
return __time, __gctime, __memory, __allocs, __return_val
681+
__sample_ref[] = (__time, __gctime, __memory, __allocs)
682+
return nothing
634683
end
635684
end,
636685
)

src/groups.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ function Base.summary(io::IO, group::BenchmarkGroup)
336336
return print(io, "$(length(group))-element BenchmarkGroup($(tagrepr(group.tags)))")
337337
end
338338

339-
function Base.show(io::IO, group::BenchmarkGroup)
339+
function Base.show(@nospecialize(io::IO), group::BenchmarkGroup)
340340
limit = get(io, :limit, true)
341341
if !(limit isa Bool)
342342
msg = (

src/trials.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ Base.show(io::IO, t::TrialEstimate) = _show(io, t)
371371
Base.show(io::IO, t::TrialRatio) = _show(io, t)
372372
Base.show(io::IO, t::TrialJudgement) = _show(io, t)
373373

374-
function Base.show(io::IO, ::MIME"text/plain", t::Trial)
374+
function Base.show(@nospecialize(io::IO), ::MIME"text/plain", t::Trial)
375375
pad = get(io, :pad, "")
376376
print(
377377
io,
@@ -578,7 +578,7 @@ function Base.show(io::IO, ::MIME"text/plain", t::Trial)
578578
return print(io, ".")
579579
end
580580

581-
function Base.show(io::IO, ::MIME"text/plain", t::TrialEstimate)
581+
function Base.show(@nospecialize(io::IO), ::MIME"text/plain", t::TrialEstimate)
582582
println(io, "BenchmarkTools.TrialEstimate: ")
583583
pad = get(io, :pad, "")
584584
println(io, pad, " time: ", prettytime(time(t)))
@@ -595,7 +595,7 @@ function Base.show(io::IO, ::MIME"text/plain", t::TrialEstimate)
595595
return print(io, pad, " allocs: ", allocs(t))
596596
end
597597

598-
function Base.show(io::IO, ::MIME"text/plain", t::TrialRatio)
598+
function Base.show(@nospecialize(io::IO), ::MIME"text/plain", t::TrialRatio)
599599
println(io, "BenchmarkTools.TrialRatio: ")
600600
pad = get(io, :pad, "")
601601
println(io, pad, " time: ", time(t))
@@ -604,7 +604,7 @@ function Base.show(io::IO, ::MIME"text/plain", t::TrialRatio)
604604
return print(io, pad, " allocs: ", allocs(t))
605605
end
606606

607-
function Base.show(io::IO, ::MIME"text/plain", t::TrialJudgement)
607+
function Base.show(@nospecialize(io::IO), ::MIME"text/plain", t::TrialJudgement)
608608
println(io, "BenchmarkTools.TrialJudgement: ")
609609
pad = get(io, :pad, "")
610610
print(io, pad, " time: ", prettydiff(time(ratio(t))), " => ")

test/ExecutionTests.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,10 @@ str = String(take!(io))
317317
b = @bprofile 1 + 1 gctrial = true
318318
Profile.print(IOContext(io, :displaysize => (24, 200)))
319319
str = String(take!(io))
320+
@test !occursin("gcscrub", str) # no allocs, so gcscrub is skipped even with gctrial=true
321+
b = @bprofile Ref(1) gctrial = true gcsample = true
322+
Profile.print(IOContext(io, :displaysize => (24, 200)))
323+
str = String(take!(io))
320324
@test occursin("gcscrub", str)
321325

322326
########
@@ -398,6 +402,15 @@ b = x = nothing
398402
GC.gc()
399403
@test x_finalized
400404

405+
# Ensure the harness itself doesn't allocate for a zero-allocation benchmark
406+
let b = @benchmarkable sin($(1))
407+
tune!(b)
408+
sample_ref = Ref{BenchmarkTools.SampleResult}((0.0, 0.0, 0, 0))
409+
b.samplefunc(b.quote_vals, b.params, sample_ref, nothing)
410+
s = sample_ref[]
411+
@test s[4] == 0 # allocs
412+
end
413+
401414
# Ensure mapvals(f) throws MethodError
402415
@test_throws MethodError BenchmarkTools.mapvals(max)
403416

0 commit comments

Comments
 (0)