Skip to content

Commit c8996d2

Browse files
Cache source reads during resolution (#16888)
## Summary If you have requirements files that are included multiple times, we can avoid going back to disk. This also guards against accidental repeated reads on standard input streams.
1 parent 2cdbf9e commit c8996d2

2 files changed

Lines changed: 98 additions & 37 deletions

File tree

crates/uv-requirements-txt/src/lib.rs

Lines changed: 80 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use std::io;
4040
use std::path::{Path, PathBuf};
4141
use std::str::FromStr;
4242

43-
use rustc_hash::FxHashSet;
43+
use rustc_hash::{FxHashMap, FxHashSet};
4444
use tracing::instrument;
4545
use unscanny::{Pattern, Scanner};
4646
use url::Url;
@@ -66,6 +66,9 @@ use crate::shquote::unquote;
6666
mod requirement;
6767
mod shquote;
6868

69+
/// A cache of file contents, keyed by path, to avoid re-reading files from disk.
70+
pub type SourceCache = FxHashMap<PathBuf, String>;
71+
6972
/// We emit one of those for each `requirements.txt` entry.
7073
enum RequirementsTxtStatement {
7174
/// `-r` inclusion filename
@@ -171,12 +174,39 @@ impl RequirementsTxt {
171174
requirements_txt: impl AsRef<Path>,
172175
working_dir: impl AsRef<Path>,
173176
client_builder: &BaseClientBuilder<'_>,
177+
) -> Result<Self, RequirementsTxtFileError> {
178+
Self::parse_with_cache(
179+
requirements_txt,
180+
working_dir,
181+
client_builder,
182+
&mut SourceCache::default(),
183+
)
184+
.await
185+
}
186+
187+
/// Parse a `requirements.txt` file, using the given cache to avoid re-reading files from disk.
188+
#[instrument(
189+
skip_all,
190+
fields(requirements_txt = requirements_txt.as_ref().as_os_str().to_str())
191+
)]
192+
pub async fn parse_with_cache(
193+
requirements_txt: impl AsRef<Path>,
194+
working_dir: impl AsRef<Path>,
195+
client_builder: &BaseClientBuilder<'_>,
196+
cache: &mut SourceCache,
174197
) -> Result<Self, RequirementsTxtFileError> {
175198
let mut visited = VisitedFiles::Requirements {
176199
requirements: &mut FxHashSet::default(),
177200
constraints: &mut FxHashSet::default(),
178201
};
179-
Self::parse_impl(requirements_txt, working_dir, client_builder, &mut visited).await
202+
Self::parse_impl(
203+
requirements_txt,
204+
working_dir,
205+
client_builder,
206+
&mut visited,
207+
cache,
208+
)
209+
.await
180210
}
181211

182212
/// See module level documentation
@@ -189,49 +219,64 @@ impl RequirementsTxt {
189219
working_dir: impl AsRef<Path>,
190220
client_builder: &BaseClientBuilder<'_>,
191221
visited: &mut VisitedFiles<'_>,
222+
cache: &mut SourceCache,
192223
) -> Result<Self, RequirementsTxtFileError> {
193224
let requirements_txt = requirements_txt.as_ref();
194225
let working_dir = working_dir.as_ref();
195226

196-
let content =
197-
if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://") {
198-
#[cfg(not(feature = "http"))]
199-
{
227+
let content = if let Some(content) = cache.get(requirements_txt) {
228+
// Use cached content if available.
229+
content.clone()
230+
} else if requirements_txt.starts_with("http://") | requirements_txt.starts_with("https://")
231+
{
232+
#[cfg(not(feature = "http"))]
233+
{
234+
return Err(RequirementsTxtFileError {
235+
file: requirements_txt.to_path_buf(),
236+
error: RequirementsTxtParserError::Io(io::Error::new(
237+
io::ErrorKind::InvalidInput,
238+
"Remote file not supported without `http` feature",
239+
)),
240+
});
241+
}
242+
243+
#[cfg(feature = "http")]
244+
{
245+
// Avoid constructing a client if network is disabled already
246+
if client_builder.is_offline() {
200247
return Err(RequirementsTxtFileError {
201248
file: requirements_txt.to_path_buf(),
202249
error: RequirementsTxtParserError::Io(io::Error::new(
203250
io::ErrorKind::InvalidInput,
204-
"Remote file not supported without `http` feature",
251+
format!(
252+
"Network connectivity is disabled, but a remote requirements file was requested: {}",
253+
requirements_txt.display()
254+
),
205255
)),
206256
});
207257
}
208258

209-
#[cfg(feature = "http")]
210-
{
211-
// Avoid constructing a client if network is disabled already
212-
if client_builder.is_offline() {
213-
return Err(RequirementsTxtFileError {
214-
file: requirements_txt.to_path_buf(),
215-
error: RequirementsTxtParserError::Io(io::Error::new(
216-
io::ErrorKind::InvalidInput,
217-
format!("Network connectivity is disabled, but a remote requirements file was requested: {}", requirements_txt.display()),
218-
)),
219-
});
220-
}
221-
222-
let client = client_builder.build();
223-
read_url_to_string(&requirements_txt, client).await
224-
}
225-
} else {
226-
// Ex) `file:///home/ferris/project/requirements.txt`
227-
uv_fs::read_to_string_transcode(&requirements_txt)
259+
let client = client_builder.build();
260+
let content = read_url_to_string(&requirements_txt, client)
228261
.await
229-
.map_err(RequirementsTxtParserError::Io)
262+
.map_err(|err| RequirementsTxtFileError {
263+
file: requirements_txt.to_path_buf(),
264+
error: err,
265+
})?;
266+
cache.insert(requirements_txt.to_path_buf(), content.clone());
267+
content
230268
}
231-
.map_err(|err| RequirementsTxtFileError {
232-
file: requirements_txt.to_path_buf(),
233-
error: err,
234-
})?;
269+
} else {
270+
// Ex) `file:///home/ferris/project/requirements.txt`
271+
let content = uv_fs::read_to_string_transcode(&requirements_txt)
272+
.await
273+
.map_err(|err| RequirementsTxtFileError {
274+
file: requirements_txt.to_path_buf(),
275+
error: RequirementsTxtParserError::Io(err),
276+
})?;
277+
cache.insert(requirements_txt.to_path_buf(), content.clone());
278+
content
279+
};
235280

236281
let requirements_dir = requirements_txt.parent().unwrap_or(working_dir);
237282
let data = Self::parse_inner(
@@ -241,6 +286,7 @@ impl RequirementsTxt {
241286
client_builder,
242287
requirements_txt,
243288
visited,
289+
cache,
244290
)
245291
.await
246292
.map_err(|err| RequirementsTxtFileError {
@@ -264,6 +310,7 @@ impl RequirementsTxt {
264310
client_builder: &BaseClientBuilder<'_>,
265311
requirements_txt: &Path,
266312
visited: &mut VisitedFiles<'_>,
313+
cache: &mut SourceCache,
267314
) -> Result<Self, RequirementsTxtParserError> {
268315
let mut s = Scanner::new(content);
269316

@@ -318,6 +365,7 @@ impl RequirementsTxt {
318365
working_dir,
319366
client_builder,
320367
visited,
368+
cache,
321369
))
322370
.await
323371
.map_err(|err| RequirementsTxtParserError::Subfile {
@@ -394,6 +442,7 @@ impl RequirementsTxt {
394442
working_dir,
395443
client_builder,
396444
&mut visited,
445+
cache,
397446
))
398447
.await
399448
.map_err(|err| RequirementsTxtParserError::Subfile {

crates/uv-requirements/src/specification.rs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ use uv_distribution_types::{
4545
use uv_fs::{CWD, Simplified};
4646
use uv_normalize::{ExtraName, PackageName, PipGroupName};
4747
use uv_pypi_types::PyProjectToml;
48-
use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement};
48+
use uv_requirements_txt::{RequirementsTxt, RequirementsTxtRequirement, SourceCache};
4949
use uv_scripts::{Pep723Error, Pep723Item, Pep723Script};
5050
use uv_warnings::warn_user;
5151

@@ -91,6 +91,16 @@ impl RequirementsSpecification {
9191
pub async fn from_source(
9292
source: &RequirementsSource,
9393
client_builder: &BaseClientBuilder<'_>,
94+
) -> Result<Self> {
95+
Self::from_source_with_cache(source, client_builder, &mut SourceCache::default()).await
96+
}
97+
98+
/// Read the requirements and constraints from a source, using a cache for file contents.
99+
#[instrument(skip_all, level = tracing::Level::DEBUG, fields(source = % source))]
100+
pub async fn from_source_with_cache(
101+
source: &RequirementsSource,
102+
client_builder: &BaseClientBuilder<'_>,
103+
cache: &mut SourceCache,
94104
) -> Result<Self> {
95105
Ok(match source {
96106
RequirementsSource::Package(requirement) => Self {
@@ -114,7 +124,8 @@ impl RequirementsSpecification {
114124
return Err(anyhow::anyhow!("File not found: `{}`", path.user_display()));
115125
}
116126

117-
let requirements_txt = RequirementsTxt::parse(path, &*CWD, client_builder).await?;
127+
let requirements_txt =
128+
RequirementsTxt::parse_with_cache(path, &*CWD, client_builder, cache).await?;
118129

119130
if requirements_txt == RequirementsTxt::default() {
120131
if path == Path::new("-") {
@@ -352,6 +363,7 @@ impl RequirementsSpecification {
352363
client_builder: &BaseClientBuilder<'_>,
353364
) -> Result<Self> {
354365
let mut spec = Self::default();
366+
let mut cache = SourceCache::default();
355367

356368
// Disallow `pylock.toml` files as constraints.
357369
if let Some(pylock_toml) = constraints.iter().find_map(|source| {
@@ -489,7 +501,7 @@ impl RequirementsSpecification {
489501
// Resolve sources into specifications so we know their `source_tree`.
490502
let mut requirement_sources = Vec::new();
491503
for source in requirements {
492-
let source = Self::from_source(source, client_builder).await?;
504+
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
493505
requirement_sources.push(source);
494506
}
495507

@@ -540,7 +552,7 @@ impl RequirementsSpecification {
540552
// Read all constraints, treating both requirements _and_ constraints as constraints.
541553
// Overrides are ignored.
542554
for source in constraints {
543-
let source = Self::from_source(source, client_builder).await?;
555+
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
544556
for entry in source.requirements {
545557
match entry.requirement {
546558
UnresolvedRequirement::Named(requirement) => {
@@ -578,7 +590,7 @@ impl RequirementsSpecification {
578590
// Read all overrides, treating both requirements _and_ overrides as overrides.
579591
// Constraints are ignored.
580592
for source in overrides {
581-
let source = Self::from_source(source, client_builder).await?;
593+
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
582594
spec.overrides.extend(source.requirements);
583595
spec.overrides.extend(source.overrides);
584596

@@ -601,7 +613,7 @@ impl RequirementsSpecification {
601613

602614
// Collect excludes.
603615
for source in excludes {
604-
let source = Self::from_source(source, client_builder).await?;
616+
let source = Self::from_source_with_cache(source, client_builder, &mut cache).await?;
605617
for req_spec in source.requirements {
606618
match req_spec.requirement {
607619
UnresolvedRequirement::Named(requirement) => {

0 commit comments

Comments
 (0)