Skip to content

Commit cad8128

Browse files
authored
Adaptively select XZ recompress dictionary size of up to 128 MiB (#97)
This increases peak RSS for users of Rustup by 64 MiB in exchange for non-negligible improvements in compression ratio for the larger tarballs: ``` # component bytes_un bytes_cur bytes_128m ratio rust-docs : 669916672 21485344 20294200 -5.543984% rustc : 386717696 82519204 76896156 -6.81423 % llvm-tools : 194253312 39117832 36593820 -6.45233 % rust-std : 163678208 29115852 28910652 -0.70477 % cargo : 42116608 10679724 10679732 +0.000075% rust-src : 40181760 3473408 3473416 +0.00023 % clippy : 21029376 4544900 4544908 +0.00018 % rustfmt : 9690624 2255472 2255480 +0.00035 % ``` All tests were done on tarballs from `https://static.rust-lang.org/dist/2025-09-18/{component}-1.90.0-x86_64-unknown-linux-gnu.tar.xz`. The size of the compressed tarballs directly downloaded from static.rust-lang.org is shown in the `bytes_cur` column. `bytes_128m` is the size of the output of `xz -T1 --lzma=preset=9e,depth=1000,dict=128M`, which is the same configuration as what `prepare-release` does with the change this pull request makes. The version used is XZ Utils 5.8.1 from Arch Linux repositories. The `cargo`, `rust-src`, `clippy` and `rustfmt` components (all smaller than 128 MiB) appearing as having regressed by exactly 8 bytes is likely a mismatch between the compressor version information written by `xz` and that written by `prepare-release`, so the 8-byte increase will probably not show up in actuality. I have confirmed via GNU Time (`/bin/time -v`) that decompressor memory usage increases by no more than 64 MiB. Additionally, the recompressor now takes note of the size of the uncompressed file to avoid excessive dictionary sizes for components that are too small to benefit from the new 128 MiB maximum. This reduces memory usage, during both compression and decompression. As per XZ documentation, a dictionary size of the form `2^n` or `2^n + 2^(n-1)` is selected. For files smaller than 128 MiB, the smallest possible one that meets or exceeds the size of the file (thus maximizing compression ratio) is chosen; beyond that, it is capped at 128 MiB.
1 parent a9aa871 commit cad8128

File tree

3 files changed

+165
-45
lines changed

3 files changed

+165
-45
lines changed

.cargo/config.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[build]
2+
# If a contributor has a shared target directory configured in their
3+
# ~/.cargo/config.toml, that setting has to be overridden to make sure the
4+
# container build can find the build output and put it into the container.
5+
# Not doing so results in a cryptic "no such file or directory" error in
6+
# run.sh.
7+
target-dir = "target"

rustfmt.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Empty to ensure that Rustfmt doesn't accidentally pick up some other unrelated
2+
# rustfmt.toml and apply the wrong formatting rules.

src/recompress.rs

Lines changed: 156 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,17 @@
1010
//! time, particularly for the xz outputs. In our infrastructure this runs on a 72 vCPU container to
1111
//! finish in a reasonable amount of time.
1212
13+
/// The maximum XZ dictionary size we're willing to choose. Rustup users will
14+
/// need at least this much free RAM to decompress the archive, and
15+
/// compression will require even more memory.
16+
const MAX_XZ_DICTSIZE: u32 = 128 * 1024 * 1024;
17+
1318
use crate::Context;
19+
use anyhow::Context as _;
20+
use std::convert::TryFrom;
1421
use std::fmt::Write as FmtWrite;
1522
use std::fs::{self, File};
16-
use std::io::{self, Read, Write};
23+
use std::io::{self, Read, Seek, Write};
1724
use std::path::Path;
1825
use std::time::{Duration, Instant};
1926
use xz2::read::XzDecoder;
@@ -28,16 +35,24 @@ pub(crate) fn recompress_file(
2835
let file_start = Instant::now();
2936
let gz_path = xz_path.with_extension("gz");
3037

31-
let mut destinations: Vec<(&str, Box<dyn io::Write>)> = Vec::new();
38+
let mut in_file = File::open(xz_path).with_context(|| "failed to open XZ-compressed input")?;
39+
let mut dec_buf = vec![0u8; 4 * 1024 * 1024];
40+
let mut compression_times = String::new();
41+
42+
let mut dec_measurements = None;
3243

3344
// Produce gzip if explicitly enabled or the destination file doesn't exist.
3445
if recompress_gz || !gz_path.is_file() {
35-
let gz = File::create(gz_path)?;
36-
destinations.push((
37-
"gz",
38-
Box::new(flate2::write::GzEncoder::new(gz, gz_compression_level)),
39-
));
40-
}
46+
let gz_out = File::create(gz_path)?;
47+
let mut gz_encoder = flate2::write::GzEncoder::new(gz_out, gz_compression_level);
48+
let mut gz_duration = Duration::ZERO;
49+
dec_measurements = Some(decompress_and_write(
50+
&mut in_file,
51+
&mut dec_buf,
52+
&mut [("gz", &mut gz_encoder, &mut gz_duration)],
53+
)?);
54+
format_compression_time(&mut compression_times, "gz", gz_duration, None)?;
55+
};
4156

4257
// xz recompression with more aggressive settings than we want to take the time
4358
// for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs.
@@ -51,11 +66,17 @@ pub(crate) fn recompress_file(
5166
// parallel.
5267
let xz_recompressed = xz_path.with_extension("xz_recompressed");
5368
if recompress_xz {
69+
let in_size = match dec_measurements {
70+
Some((_, size)) => size,
71+
None => measure_compressed_file(&mut in_file, &mut dec_buf)?.1,
72+
};
73+
let dictsize = choose_xz_dictsize(u32::try_from(in_size).unwrap_or(u32::MAX));
74+
5475
let mut filters = xz2::stream::Filters::new();
5576
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
5677
// This sets the overall dictionary size, which is also how much memory (baseline)
5778
// is needed for decompression.
58-
lzma_ops.dict_size(64 * 1024 * 1024);
79+
lzma_ops.dict_size(dictsize);
5980
// Use the best match finder for compression ratio.
6081
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
6182
lzma_ops.mode(xz2::stream::Mode::Normal);
@@ -76,61 +97,148 @@ pub(crate) fn recompress_file(
7697
// FIXME: Do we want a checksum as part of compression?
7798
let stream =
7899
xz2::stream::Stream::new_stream_encoder(&filters, xz2::stream::Check::None).unwrap();
100+
79101
let xz_out = File::create(&xz_recompressed)?;
80-
destinations.push((
81-
"xz",
82-
Box::new(xz2::write::XzEncoder::new_stream(
83-
std::io::BufWriter::new(xz_out),
84-
stream,
85-
)),
86-
));
102+
let mut xz_encoder = xz2::write::XzEncoder::new_stream(io::BufWriter::new(xz_out), stream);
103+
let mut xz_duration = Duration::ZERO;
104+
dec_measurements = Some(decompress_and_write(
105+
&mut in_file,
106+
&mut dec_buf,
107+
&mut [("xz", &mut xz_encoder, &mut xz_duration)],
108+
)?);
109+
format_compression_time(&mut compression_times, "xz", xz_duration, Some(dictsize))?;
87110
}
88111

89-
// We only decompress once and then write into each of the compressors before
90-
// moving on.
91-
//
92-
// This code assumes that compression with `write_all` will never fail (i.e., we
93-
// can take arbitrary amounts of data as input). That seems like a reasonable
94-
// assumption though.
95-
let mut decompressor = XzDecoder::new(File::open(xz_path)?);
96-
let mut buffer = vec![0u8; 4 * 1024 * 1024];
112+
drop(in_file);
113+
114+
print!(
115+
"recompressed {}: {:.2?} total",
116+
xz_path.display(),
117+
file_start.elapsed()
118+
);
119+
if let Some((decompress_time, _)) = dec_measurements {
120+
print!(" {:.2?} decompression", decompress_time);
121+
}
122+
println!("{}", compression_times);
123+
124+
if recompress_xz {
125+
fs::rename(&xz_recompressed, xz_path)?;
126+
}
127+
128+
Ok(())
129+
}
130+
131+
/// Decompresses the given XZ stream and sends it to the given set of destinations.
132+
/// Writes the time taken by each individual destination to the corresponding tuple
133+
/// and returns the total time taken by the decompressor and the total size of the
134+
/// decompressed stream.
135+
fn decompress_and_write(
136+
src: &mut (impl Read + Seek),
137+
buf: &mut [u8],
138+
destinations: &mut [(&str, &mut dyn Write, &mut Duration)],
139+
) -> anyhow::Result<(Duration, u64)> {
140+
src.rewind().with_context(|| "input file seek failed")?;
141+
let mut decompressor = XzDecoder::new(src);
97142
let mut decompress_time = Duration::ZERO;
98-
let mut time_by_dest = vec![Duration::ZERO; destinations.len()];
143+
let mut total_length = 0_u64;
99144
loop {
100145
let start = Instant::now();
101-
let length = decompressor.read(&mut buffer)?;
146+
let length = decompressor
147+
.read(buf)
148+
.with_context(|| "XZ decompression failed")?;
102149
decompress_time += start.elapsed();
150+
total_length += length as u64;
103151
if length == 0 {
104152
break;
105153
}
106-
for (idx, (_, destination)) in destinations.iter_mut().enumerate() {
154+
// This code assumes that compression with `write_all` will never fail (i.e.,
155+
// we can take arbitrary amounts of data as input). That seems like a
156+
// reasonable assumption though.
157+
for (compname, destination, duration) in destinations.iter_mut() {
107158
let start = std::time::Instant::now();
108-
destination.write_all(&buffer[..length])?;
109-
time_by_dest[idx] += start.elapsed();
159+
destination
160+
.write_all(&buf[..length])
161+
.with_context(|| format!("{compname} compression failed"))?;
162+
**duration += start.elapsed();
110163
}
111164
}
165+
Ok((decompress_time, total_length))
166+
}
112167

113-
let mut compression_times = String::new();
114-
for (idx, (name, _)) in destinations.iter().enumerate() {
168+
/// Calls `decompress_and_write` solely to measure the file's uncompressed size
169+
/// and the time taken by decompression.
170+
fn measure_compressed_file(
171+
src: &mut (impl Read + Seek),
172+
buf: &mut [u8],
173+
) -> anyhow::Result<(Duration, u64)> {
174+
decompress_and_write(src, buf, &mut [])
175+
}
176+
177+
fn format_compression_time(
178+
out: &mut String,
179+
name: &str,
180+
duration: Duration,
181+
dictsize: Option<u32>,
182+
) -> std::fmt::Result {
183+
write!(out, ", {:.2?} {} compression", duration, name)?;
184+
if let Some(mut dictsize) = dictsize {
185+
let mut iprefix = 0;
186+
// Divide by 1024 until the result would be inexact or we run out of prefixes.
187+
while iprefix < 2 && dictsize.is_multiple_of(1024) {
188+
iprefix += 1;
189+
dictsize /= 1024;
190+
}
115191
write!(
116-
compression_times,
117-
", {:.2?} {} compression",
118-
time_by_dest[idx], name
192+
out,
193+
" with {dictsize} {}B dictionary",
194+
["", "Ki", "Mi"][iprefix]
119195
)?;
120196
}
121-
println!(
122-
"recompressed {}: {:.2?} total, {:.2?} decompression{}",
123-
xz_path.display(),
124-
file_start.elapsed(),
125-
decompress_time,
126-
compression_times
127-
);
197+
Ok(())
198+
}
128199

129-
if recompress_xz {
130-
fs::rename(&xz_recompressed, xz_path)?;
200+
/// Chooses the smallest XZ dictionary size that is at least as large as the
201+
/// file and will not be rounded by XZ, clipping it to the range of acceptable
202+
/// dictionary sizes.
203+
///
204+
/// XZ's dictionary sizes are the sum of one or two powers of two. As such, this
205+
/// function amounts to finding for some `sz` the smallest integer `d` which
206+
/// upholds all of the following properties:
207+
/// - has the form `2^n` or `2^n + 2^(n-1)`
208+
/// - `d` ≥ minimum XZ dictionary size
209+
/// - `d` ≤ maximum XZ dictionary size
210+
/// - `d` ≥ `sz`, but only if `sz` ≤ maximum XZ dictionary size
211+
fn choose_xz_dictsize(mut sz: u32) -> u32 {
212+
/// XZ's minimum dictionary size, which is 4 KiB.
213+
const MIN_XZ_DICTSIZE: u32 = 4096;
214+
const {
215+
// This check is to prevent overflow further down the line
216+
// regardless of the value of MAX_XZ_DICTSIZE.
217+
assert!(
218+
MAX_XZ_DICTSIZE <= (1024 + 512) * 1024 * 1024,
219+
"XZ dictionary size only goes up to 1.5 GiB"
220+
);
221+
};
222+
sz = sz.clamp(MIN_XZ_DICTSIZE, MAX_XZ_DICTSIZE);
223+
if sz.is_power_of_two() {
224+
return sz;
131225
}
132226

133-
Ok(())
227+
// FIXME: u32::isolate_highest_one() once stable, https://github.com/rust-lang/rust/issues/136909.
228+
let hi_one = sz & (1_u32 << 31).wrapping_shr(sz.leading_zeros());
229+
230+
// For a bitstring of the form 01x…, check if 0110…0 (the 2^n + 2^(n-1) form) is
231+
// greater or equal. For example, for sz = 17M (16M + 1M), hi_one will be 16M and
232+
// twinbit_form will be 24M (16M + 8M) and the check will succeed, whereas for
233+
// sz = 25M (16M + 8M + 1M), twinbit_form will also be 24M (16M + 8M) and the check
234+
// will fail.
235+
let twinbit_form = hi_one | (hi_one >> 1);
236+
if twinbit_form >= sz {
237+
return twinbit_form;
238+
}
239+
240+
// Otherwise, we go for the next power of two.
241+
std::cmp::min(hi_one << 1, MAX_XZ_DICTSIZE)
134242
}
135243

136244
impl Context {
@@ -192,7 +300,10 @@ impl Context {
192300
let path = to_recompress.lock().unwrap().pop();
193301
path
194302
} {
195-
recompress_file(&xz_path, recompress_gz, compression_level, recompress_xz)?;
303+
recompress_file(&xz_path, recompress_gz, compression_level, recompress_xz)
304+
.with_context(|| {
305+
format!("failed to recompress {}", xz_path.display())
306+
})?;
196307
}
197308

198309
Ok::<_, anyhow::Error>(())

0 commit comments

Comments
 (0)