1616const MAX_XZ_DICTSIZE : u32 = 128 * 1024 * 1024 ;
1717
1818use crate :: Context ;
19+ use anyhow:: Context as _;
1920use std:: convert:: TryFrom ;
2021use std:: fmt:: Write as FmtWrite ;
2122use std:: fs:: { self , File } ;
22- use std:: io:: { self , Read , Write } ;
23+ use std:: io:: { self , Read , Seek , Write } ;
2324use std:: path:: Path ;
2425use std:: time:: { Duration , Instant } ;
2526use xz2:: read:: XzDecoder ;
@@ -34,7 +35,7 @@ pub(crate) fn recompress_file(
3435 let file_start = Instant :: now ( ) ;
3536 let gz_path = xz_path. with_extension ( "gz" ) ;
3637
37- let mut in_file = File :: open ( xz_path) ?;
38+ let mut in_file = File :: open ( xz_path) . with_context ( || "failed to open XZ-compressed input" ) ?;
3839 let mut dec_buf = vec ! [ 0u8 ; 4 * 1024 * 1024 ] ;
3940 let mut compression_times = String :: new ( ) ;
4041
@@ -48,11 +49,11 @@ pub(crate) fn recompress_file(
4849 dec_measurements = Some ( decompress_and_write (
4950 & mut in_file,
5051 & mut dec_buf,
51- & mut [ & mut gz_encoder] ,
52+ & mut [ ( "gz" , & mut gz_encoder) ] ,
5253 & mut gz_duration,
5354 ) ?) ;
5455 let [ gz_duration] = gz_duration;
55- format_compression_time ( & mut compression_times, "gz" , gz_duration) ?;
56+ format_compression_time ( & mut compression_times, "gz" , gz_duration, None ) ?;
5657 } ;
5758
5859 // xz recompression with more aggressive settings than we want to take the time
@@ -71,13 +72,13 @@ pub(crate) fn recompress_file(
7172 Some ( ( _, size) ) => size,
7273 None => measure_compressed_file ( & mut in_file, & mut dec_buf) ?. 1 ,
7374 } ;
74- let in_size = u32:: try_from ( in_size) . unwrap_or ( u32:: MAX ) ;
75+ let dictsize = choose_xz_dictsize ( u32:: try_from ( in_size) . unwrap_or ( u32:: MAX ) ) ;
7576
7677 let mut filters = xz2:: stream:: Filters :: new ( ) ;
7778 let mut lzma_ops = xz2:: stream:: LzmaOptions :: new_preset ( 9 ) . unwrap ( ) ;
7879 // This sets the overall dictionary size, which is also how much memory (baseline)
7980 // is needed for decompression.
80- lzma_ops. dict_size ( choose_xz_dictsize ( in_size ) ) ;
81+ lzma_ops. dict_size ( dictsize ) ;
8182 // Use the best match finder for compression ratio.
8283 lzma_ops. match_finder ( xz2:: stream:: MatchFinder :: BinaryTree4 ) ;
8384 lzma_ops. mode ( xz2:: stream:: Mode :: Normal ) ;
@@ -105,11 +106,11 @@ pub(crate) fn recompress_file(
105106 dec_measurements = Some ( decompress_and_write (
106107 & mut in_file,
107108 & mut dec_buf,
108- & mut [ & mut xz_encoder] ,
109+ & mut [ ( "xz" , & mut xz_encoder) ] ,
109110 & mut xz_duration,
110111 ) ?) ;
111112 let [ xz_duration] = xz_duration;
112- format_compression_time ( & mut compression_times, "xz" , xz_duration) ?;
113+ format_compression_time ( & mut compression_times, "xz" , xz_duration, Some ( dictsize ) ) ?;
113114 }
114115
115116 drop ( in_file) ;
@@ -136,17 +137,20 @@ pub(crate) fn recompress_file(
136137/// durations and returns the total time taken by the decompressor and the total
137138/// size of the decompressed stream.
138139fn decompress_and_write (
139- src : & mut impl Read ,
140+ src : & mut ( impl Read + Seek ) ,
140141 buf : & mut [ u8 ] ,
141- destinations : & mut [ & mut dyn Write ] ,
142+ destinations : & mut [ ( & str , & mut dyn Write ) ] ,
142143 time_by_dst : & mut [ Duration ] ,
143- ) -> io:: Result < ( Duration , u64 ) > {
144+ ) -> anyhow:: Result < ( Duration , u64 ) > {
145+ src. rewind ( ) . with_context ( || "input file seek failed" ) ?;
144146 let mut decompressor = XzDecoder :: new ( src) ;
145147 let mut decompress_time = Duration :: ZERO ;
146148 let mut total_length = 0_u64 ;
147149 loop {
148150 let start = Instant :: now ( ) ;
149- let length = decompressor. read ( buf) ?;
151+ let length = decompressor
152+ . read ( buf)
153+ . with_context ( || "XZ decompression failed" ) ?;
150154 decompress_time += start. elapsed ( ) ;
151155 total_length += length as u64 ;
152156 if length == 0 {
@@ -155,9 +159,11 @@ fn decompress_and_write(
155159 // This code assumes that compression with `write_all` will never fail (i.e.,
156160 // we can take arbitrary amounts of data as input). That seems like a
157161 // reasonable assumption though.
158- for ( idx, destination) in destinations. iter_mut ( ) . enumerate ( ) {
162+ for ( idx, ( compname , destination) ) in destinations. iter_mut ( ) . enumerate ( ) {
159163 let start = std:: time:: Instant :: now ( ) ;
160- destination. write_all ( & buf[ ..length] ) ?;
164+ destination
165+ . write_all ( & buf[ ..length] )
166+ . with_context ( || format ! ( "{compname} compression failed" ) ) ?;
161167 time_by_dst[ idx] += start. elapsed ( ) ;
162168 }
163169 }
@@ -166,12 +172,33 @@ fn decompress_and_write(
166172
167173/// Calls `decompress_and_write` solely to measure the file's uncompressed size
168174/// and the time taken by decompression.
169- fn measure_compressed_file ( src : & mut impl Read , buf : & mut [ u8 ] ) -> io:: Result < ( Duration , u64 ) > {
175+ fn measure_compressed_file (
176+ src : & mut ( impl Read + Seek ) ,
177+ buf : & mut [ u8 ] ,
178+ ) -> anyhow:: Result < ( Duration , u64 ) > {
170179 decompress_and_write ( src, buf, & mut [ ] , & mut [ ] )
171180}
172181
173- fn format_compression_time ( out : & mut String , name : & str , duration : Duration ) -> std:: fmt:: Result {
174- write ! ( out, ", {:.2?} {} compression" , duration, name)
182+ fn format_compression_time (
183+ out : & mut String ,
184+ name : & str ,
185+ duration : Duration ,
186+ dictsize : Option < u32 > ,
187+ ) -> std:: fmt:: Result {
188+ write ! ( out, ", {:.2?} {} compression" , duration, name) ?;
189+ if let Some ( mut dictsize) = dictsize {
190+ let mut iprefix = 0 ;
191+ while iprefix < 2 && dictsize & 1023 == 0 {
192+ iprefix += 1 ;
193+ dictsize >>= 10 ;
194+ }
195+ write ! (
196+ out,
197+ " with {dictsize} {}B dictionary" ,
198+ [ "" , "Ki" , "Mi" ] [ iprefix]
199+ ) ?;
200+ }
201+ Ok ( ( ) )
175202}
176203
177204/// Chooses the smallest XZ dictionary size that is at least as large as the
@@ -275,7 +302,10 @@ impl Context {
275302 let path = to_recompress. lock ( ) . unwrap ( ) . pop ( ) ;
276303 path
277304 } {
278- recompress_file ( & xz_path, recompress_gz, compression_level, recompress_xz) ?;
305+ recompress_file ( & xz_path, recompress_gz, compression_level, recompress_xz)
306+ . with_context ( || {
307+ format ! ( "failed to recompress {}" , xz_path. display( ) )
308+ } ) ?;
279309 }
280310
281311 Ok :: < _ , anyhow:: Error > ( ( ) )
0 commit comments