Skip to content

Commit

Permalink
Parse URLs lazily in resolver (#10259)
Browse files Browse the repository at this point in the history
## Summary

The idea here is to avoid parsing all registry URLs upfront, and instead
parse them when we need them.

Closes #6133.
  • Loading branch information
charliermarsh authored Jan 1, 2025
1 parent a65aebb commit 7bbec6b
Show file tree
Hide file tree
Showing 9 changed files with 70 additions and 89 deletions.
2 changes: 1 addition & 1 deletion crates/uv-client/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl From<ErrorKind> for Error {
#[derive(Debug, thiserror::Error)]
pub enum ErrorKind {
#[error(transparent)]
UrlParse(#[from] url::ParseError),
InvalidUrl(#[from] uv_distribution_types::ToUrlError),

#[error(transparent)]
JoinRelativeUrl(#[from] uv_pypi_types::JoinRelativeError),
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ impl RegistryClient {
}
}
FileLocation::AbsoluteUrl(url) => {
let url = url.to_url();
let url = url.to_url().map_err(ErrorKind::InvalidUrl)?;
if url.scheme() == "file" {
let path = url
.to_file_path()
Expand Down
80 changes: 23 additions & 57 deletions crates/uv-distribution-types/src/file.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use std::borrow::Cow;
use std::fmt::{self, Display, Formatter};
use std::path::PathBuf;
use std::str::FromStr;

use jiff::Timestamp;
use serde::{Deserialize, Serialize};
use url::Url;

use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError};
use uv_pep508::VerbatimUrl;
use uv_pep508::split_scheme;
use uv_pypi_types::{CoreMetadata, HashDigest, Yanked};

/// Error converting [`uv_pypi_types::File`] to [`distribution_type::File`].
Expand Down Expand Up @@ -56,9 +54,9 @@ impl File {
.map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
size: file.size,
upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
url: match Url::parse(&file.url) {
Ok(url) => FileLocation::AbsoluteUrl(url.into()),
Err(_) => FileLocation::RelativeUrl(base.to_string(), file.url),
url: match split_scheme(&file.url) {
Some(..) => FileLocation::AbsoluteUrl(UrlString::new(file.url)),
None => FileLocation::RelativeUrl(base.to_string(), file.url),
},
yanked: file.yanked,
})
Expand Down Expand Up @@ -101,7 +99,7 @@ impl FileLocation {
})?;
Ok(joined)
}
FileLocation::AbsoluteUrl(ref absolute) => Ok(absolute.to_url()),
FileLocation::AbsoluteUrl(ref absolute) => absolute.to_url(),
}
}

Expand All @@ -128,7 +126,7 @@ impl Display for FileLocation {

/// A [`Url`] represented as a `String`.
///
/// This type is guaranteed to be a valid URL but avoids being parsed into the [`Url`] type.
/// This type is not guaranteed to be a valid URL, and may error on conversion.
#[derive(
Debug,
Clone,
Expand All @@ -148,10 +146,17 @@ impl Display for FileLocation {
pub struct UrlString(String);

impl UrlString {
/// Create a new [`UrlString`] from a [`String`].
pub fn new(url: String) -> Self {
Self(url)
}

/// Converts a [`UrlString`] to a [`Url`].
pub fn to_url(&self) -> Url {
// This conversion can never fail as the only way to construct a `UrlString` is from a `Url`.
Url::from_str(&self.0).unwrap()
pub fn to_url(&self) -> Result<Url, ToUrlError> {
Url::from_str(&self.0).map_err(|err| ToUrlError::InvalidAbsolute {
absolute: self.0.clone(),
err,
})
}

/// Return the [`UrlString`] with any query parameters and fragments removed.
Expand Down Expand Up @@ -194,42 +199,18 @@ impl From<&Url> for UrlString {
}
}

impl From<Cow<'_, Url>> for UrlString {
fn from(value: Cow<'_, Url>) -> Self {
UrlString(value.to_string())
}
}

impl From<VerbatimUrl> for UrlString {
fn from(value: VerbatimUrl) -> Self {
UrlString(value.raw().to_string())
}
}

impl From<&VerbatimUrl> for UrlString {
fn from(value: &VerbatimUrl) -> Self {
UrlString(value.raw().to_string())
}
}

impl From<UrlString> for String {
fn from(value: UrlString) -> Self {
value.0
}
}

impl Display for UrlString {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}

/// An error that occurs when a `FileLocation` is not a valid URL.
/// An error that occurs when a [`FileLocation`] is not a valid URL.
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
pub enum ToUrlError {
/// An error that occurs when the base URL in `FileLocation::Relative`
/// An error that occurs when the base URL in [`FileLocation::Relative`]
/// could not be parsed as a valid URL.
#[error("could not parse base URL `{base}` as a valid URL")]
#[error("Could not parse base URL `{base}` as a valid URL")]
InvalidBase {
/// The base URL that could not be parsed as a valid URL.
base: String,
Expand All @@ -238,8 +219,8 @@ pub enum ToUrlError {
err: url::ParseError,
},
/// An error that occurs when the base URL could not be joined with
/// the relative path in a `FileLocation::Relative`.
#[error("could not join base URL `{base}` to relative path `{path}`")]
/// the relative path in a [`FileLocation::Relative`].
#[error("Could not join base URL `{base}` to relative path `{path}`")]
InvalidJoin {
/// The base URL that could not be parsed as a valid URL.
base: String,
Expand All @@ -249,31 +230,16 @@ pub enum ToUrlError {
#[source]
err: url::ParseError,
},
/// An error that occurs when the absolute URL in `FileLocation::Absolute`
/// An error that occurs when the absolute URL in [`FileLocation::Absolute`]
/// could not be parsed as a valid URL.
#[error("could not parse absolute URL `{absolute}` as a valid URL")]
#[error("Could not parse absolute URL `{absolute}` as a valid URL")]
InvalidAbsolute {
/// The absolute URL that could not be parsed as a valid URL.
absolute: String,
/// The underlying URL parse error.
#[source]
err: url::ParseError,
},
/// An error that occurs when the file path in `FileLocation::Path` is
/// not valid UTF-8. We need paths to be valid UTF-8 to be transformed
/// into URLs, which must also be UTF-8.
#[error("could not build URL from file path `{path}` because it is not valid UTF-8")]
PathNotUtf8 {
/// The original path that was not valid UTF-8.
path: PathBuf,
},
/// An error that occurs when the file URL created from a file path is not
/// a valid URL.
#[error("could not parse file path `{path}` as a valid URL")]
InvalidPath {
/// The file path URL that could not be parsed as a valid URL.
path: String,
},
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-distribution/src/distribution_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url(),
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};

// Create a cache entry for the wheel.
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ pub enum Error {
NoBuild,

// Network error
#[error("Failed to parse URL: {0}")]
Url(String, #[source] url::ParseError),
#[error("Expected an absolute path, but received: {}", _0.user_display())]
RelativePath(PathBuf),
#[error(transparent)]
InvalidUrl(#[from] uv_distribution_types::ToUrlError),
#[error(transparent)]
ParsedUrl(#[from] ParsedUrlError),
#[error(transparent)]
JoinRelativeUrl(#[from] uv_pypi_types::JoinRelativeError),
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url(),
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};

// If the URL is a file URL, use the local path directly.
Expand Down Expand Up @@ -263,7 +263,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
FileLocation::RelativeUrl(base, url) => {
uv_pypi_types::base_url_join_relative(base, url)?
}
FileLocation::AbsoluteUrl(url) => url.to_url(),
FileLocation::AbsoluteUrl(url) => url.to_url()?,
};

// If the URL is a file URL, use the local path directly.
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-requirements/src/upgrade.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ pub fn read_lock_requirements(
preferences.push(Preference::from_lock(package, install_path)?);

// Map each entry in the lockfile to a Git SHA.
if let Some(git_ref) = package.as_git_ref() {
if let Some(git_ref) = package.as_git_ref()? {
git.push(git_ref);
}
}
Expand Down
47 changes: 27 additions & 20 deletions crates/uv-resolver/src/lock/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1098,7 +1098,7 @@ impl Lock {
.into_iter()
.filter_map(|index| match index.url() {
IndexUrl::Pypi(_) | IndexUrl::Url(_) => {
Some(UrlString::from(index.url().redacted()))
Some(UrlString::from(index.url().redacted().as_ref()))
}
IndexUrl::Path(_) => None,
})
Expand Down Expand Up @@ -1814,7 +1814,7 @@ impl Package {
let filename: WheelFilename =
self.wheels[best_wheel_index].filename.clone();
let url = Url::from(ParsedArchiveUrl {
url: url.to_url(),
url: url.to_url().map_err(LockErrorKind::InvalidUrl)?,
subdirectory: direct.subdirectory.clone(),
ext: DistExtension::Wheel,
});
Expand Down Expand Up @@ -1946,7 +1946,7 @@ impl Package {
Source::Git(url, git) => {
// Remove the fragment and query from the URL; they're already present in the
// `GitSource`.
let mut url = url.to_url();
let mut url = url.to_url().map_err(LockErrorKind::InvalidUrl)?;
url.set_fragment(None);
url.set_query(None);

Expand Down Expand Up @@ -1976,7 +1976,7 @@ impl Package {
let DistExtension::Source(ext) = DistExtension::from_path(url.as_ref())? else {
return Ok(None);
};
let location = url.to_url();
let location = url.to_url().map_err(LockErrorKind::InvalidUrl)?;
let subdirectory = direct.subdirectory.as_ref().map(PathBuf::from);
let url = Url::from(ParsedArchiveUrl {
url: location.clone(),
Expand Down Expand Up @@ -2020,7 +2020,10 @@ impl Package {
url: FileLocation::AbsoluteUrl(file_url.clone()),
yanked: None,
});
let index = IndexUrl::from(VerbatimUrl::from_url(url.to_url()));

let index = IndexUrl::from(VerbatimUrl::from_url(
url.to_url().map_err(LockErrorKind::InvalidUrl)?,
));

let reg_dist = RegistrySourceDist {
name: self.id.name.clone(),
Expand Down Expand Up @@ -2262,7 +2265,9 @@ impl Package {
pub fn index(&self, root: &Path) -> Result<Option<IndexUrl>, LockError> {
match &self.id.source {
Source::Registry(RegistrySource::Url(url)) => {
let index = IndexUrl::from(VerbatimUrl::from_url(url.to_url()));
let index = IndexUrl::from(VerbatimUrl::from_url(
url.to_url().map_err(LockErrorKind::InvalidUrl)?,
));
Ok(Some(index))
}
Source::Registry(RegistrySource::Path(path)) => {
Expand Down Expand Up @@ -2291,16 +2296,16 @@ impl Package {
}

/// Returns the [`ResolvedRepositoryReference`] for the package, if it is a Git source.
pub fn as_git_ref(&self) -> Option<ResolvedRepositoryReference> {
pub fn as_git_ref(&self) -> Result<Option<ResolvedRepositoryReference>, LockError> {
match &self.id.source {
Source::Git(url, git) => Some(ResolvedRepositoryReference {
Source::Git(url, git) => Ok(Some(ResolvedRepositoryReference {
reference: RepositoryReference {
url: RepositoryUrl::new(&url.to_url()),
url: RepositoryUrl::new(&url.to_url().map_err(LockErrorKind::InvalidUrl)?),
reference: GitReference::from(git.kind.clone()),
},
sha: git.precise,
}),
_ => None,
})),
_ => Ok(None),
}
}
}
Expand Down Expand Up @@ -3138,7 +3143,7 @@ impl SourceDist {
match &reg_dist.index {
IndexUrl::Pypi(_) | IndexUrl::Url(_) => {
let url = normalize_file_location(&reg_dist.file.url)
.map_err(LockErrorKind::InvalidFileUrl)
.map_err(LockErrorKind::InvalidUrl)
.map_err(LockError::from)?;
let hash = reg_dist.file.hashes.iter().max().cloned().map(Hash::from);
let size = reg_dist.file.size;
Expand All @@ -3153,7 +3158,7 @@ impl SourceDist {
.file
.url
.to_url()
.map_err(LockErrorKind::InvalidFileUrl)?
.map_err(LockErrorKind::InvalidUrl)?
.to_file_path()
.map_err(|()| LockErrorKind::UrlToPath)?;
let path = relative_to(&reg_dist_path, index_path)
Expand Down Expand Up @@ -3444,7 +3449,7 @@ impl Wheel {
match &wheel.index {
IndexUrl::Pypi(_) | IndexUrl::Url(_) => {
let url = normalize_file_location(&wheel.file.url)
.map_err(LockErrorKind::InvalidFileUrl)
.map_err(LockErrorKind::InvalidUrl)
.map_err(LockError::from)?;
let hash = wheel.file.hashes.iter().max().cloned().map(Hash::from);
let size = wheel.file.size;
Expand All @@ -3461,7 +3466,7 @@ impl Wheel {
.file
.url
.to_url()
.map_err(LockErrorKind::InvalidFileUrl)?
.map_err(LockErrorKind::InvalidUrl)?
.to_file_path()
.map_err(|()| LockErrorKind::UrlToPath)?;
let path = relative_to(&wheel_path, index_path)
Expand Down Expand Up @@ -3507,7 +3512,7 @@ impl Wheel {
let filename: WheelFilename = self.filename.clone();

match source {
RegistrySource::Url(index_url) => {
RegistrySource::Url(url) => {
let file_url = match &self.url {
WheelWireSource::Url { url } => url,
WheelWireSource::Path { .. } | WheelWireSource::Filename { .. } => {
Expand All @@ -3528,7 +3533,9 @@ impl Wheel {
url: FileLocation::AbsoluteUrl(file_url.clone()),
yanked: None,
});
let index = IndexUrl::from(VerbatimUrl::from_url(index_url.to_url()));
let index = IndexUrl::from(VerbatimUrl::from_url(
url.to_url().map_err(LockErrorKind::InvalidUrl)?,
));
Ok(RegistryBuiltWheel {
filename,
file,
Expand Down Expand Up @@ -4094,11 +4101,11 @@ enum LockErrorKind {
},
/// An error that occurs when the URL to a file for a wheel or
/// source dist could not be converted to a structured `url::Url`.
#[error("Failed to parse wheel or source distribution URL")]
InvalidFileUrl(
#[error(transparent)]
InvalidUrl(
/// The underlying error that occurred. This includes the
/// errant URL in its error message.
#[source]
#[from]
ToUrlError,
),
/// An error that occurs when the extension can't be determined
Expand Down
Loading

0 comments on commit 7bbec6b

Please sign in to comment.