Skip to content

Commit

Permalink
Downloader support resume from connection reset (#9422)
Browse files Browse the repository at this point in the history
  • Loading branch information
austinzh authored Aug 17, 2024
1 parent d1650fa commit fe3641e
Show file tree
Hide file tree
Showing 11 changed files with 173 additions and 16 deletions.
13 changes: 13 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,19 @@ values, usage instructions and warnings.

Use parallel execution when using the new (`>=1.1.0`) installer.

### `requests.max-retries`

**Type**: `int`

**Default**: `0`

**Environment Variable**: `POETRY_REQUESTS_MAX_RETRIES`

*Introduced in 1.9.0*

Set the maximum number of retries in an unstable network.
This setting has no effect if the server does not support HTTP range requests.

### `solver.lazy-wheel`

**Type**: `boolean`
Expand Down
8 changes: 7 additions & 1 deletion src/poetry/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ class Config:
"experimental": {
"system-git-client": False,
},
"requests": {
"max-retries": 0,
},
"installer": {
"parallel": True,
"max-workers": None,
Expand Down Expand Up @@ -316,7 +319,10 @@ def _get_normalizer(name: str) -> Callable[[str], Any]:
if name == "virtualenvs.path":
return lambda val: str(Path(val))

if name == "installer.max-workers":
if name in {
"installer.max-workers",
"requests.max-retries",
}:
return int_normalizer

if name in ["installer.no-binary", "installer.only-binary"]:
Expand Down
1 change: 1 addition & 0 deletions src/poetry/console/commands/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def unique_config_values(self) -> dict[str, tuple[Any, Any]]:
"virtualenvs.prefer-active-python": (boolean_validator, boolean_normalizer),
"virtualenvs.prompt": (str, str),
"experimental.system-git-client": (boolean_validator, boolean_normalizer),
"requests.max-retries": (lambda val: int(val) >= 0, int_normalizer),
"installer.parallel": (boolean_validator, boolean_normalizer),
"installer.max-workers": (lambda val: int(val) > 0, int_normalizer),
"installer.no-binary": (
Expand Down
5 changes: 4 additions & 1 deletion src/poetry/installation/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(
# Cache whether decorated output is supported.
# https://github.com/python-poetry/cleo/issues/423
self._decorated_output: bool = self._io.output.is_decorated()
self._max_retries = config.get("requests.max-retries", 0)

@property
def installations_count(self) -> int:
Expand Down Expand Up @@ -719,7 +720,9 @@ def _download_archive(
url: str,
dest: Path,
) -> None:
downloader = Downloader(url, dest, self._authenticator)
downloader = Downloader(
url, dest, self._authenticator, max_retries=self._max_retries
)
wheel_size = downloader.total_size

operation_message = self.get_operation_message(operation)
Expand Down
7 changes: 6 additions & 1 deletion src/poetry/packages/direct_origin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from poetry.core.packages.utils.link import Link

from poetry.config.config import Config
from poetry.inspection.info import PackageInfo
from poetry.inspection.info import PackageInfoError
from poetry.utils.authenticator import get_default_authenticator
Expand Down Expand Up @@ -57,6 +58,8 @@ def _get_package_from_git(
class DirectOrigin:
def __init__(self, artifact_cache: ArtifactCache) -> None:
self._artifact_cache = artifact_cache
config = Config.create()
self._max_retries = config.get("requests.max-retries", 0)
self._authenticator = get_default_authenticator()

@classmethod
Expand All @@ -77,7 +80,9 @@ def get_package_from_directory(cls, directory: Path) -> Package:
return PackageInfo.from_directory(path=directory).to_package(root_dir=directory)

def _download_file(self, url: str, dest: Path) -> None:
download_file(url, dest, session=self._authenticator)
download_file(
url, dest, session=self._authenticator, max_retries=self._max_retries
)

def get_package_from_url(self, url: str) -> Package:
link = Link(url)
Expand Down
7 changes: 6 additions & 1 deletion src/poetry/repositories/http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(
self.get_page = functools.lru_cache(maxsize=None)(self._get_page)

self._lazy_wheel = config.get("solver.lazy-wheel", True)
self._max_retries = config.get("requests.max-retries", 0)
# We are tracking if a domain supports range requests or not to avoid
# unnecessary requests.
# ATTENTION: A domain might support range requests only for some files, so the
Expand Down Expand Up @@ -95,7 +96,11 @@ def _download(
self, url: str, dest: Path, *, raise_accepts_ranges: bool = False
) -> None:
return download_file(
url, dest, session=self.session, raise_accepts_ranges=raise_accepts_ranges
url,
dest,
session=self.session,
raise_accepts_ranges=raise_accepts_ranges,
max_retries=self._max_retries,
)

@contextmanager
Expand Down
55 changes: 45 additions & 10 deletions src/poetry/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from typing import Any
from typing import overload

from requests.exceptions import ChunkedEncodingError
from requests.exceptions import ConnectionError
from requests.utils import atomic_open

from poetry.utils.authenticator import get_default_authenticator
Expand All @@ -33,6 +35,7 @@
from types import TracebackType

from poetry.core.packages.package import Package
from requests import Response
from requests import Session

from poetry.utils.authenticator import Authenticator
Expand Down Expand Up @@ -133,10 +136,11 @@ def download_file(
session: Authenticator | Session | None = None,
chunk_size: int = 1024,
raise_accepts_ranges: bool = False,
max_retries: int = 0,
) -> None:
from poetry.puzzle.provider import Indicator

downloader = Downloader(url, dest, session)
downloader = Downloader(url, dest, session, max_retries=max_retries)

if raise_accepts_ranges and downloader.accepts_ranges:
raise HTTPRangeRequestSupported(f"URL {url} supports range requests.")
Expand Down Expand Up @@ -168,16 +172,13 @@ def __init__(
url: str,
dest: Path,
session: Authenticator | Session | None = None,
max_retries: int = 0,
):
self._dest = dest

session = session or get_default_authenticator()
headers = {"Accept-Encoding": "Identity"}

self._response = session.get(
url, stream=True, headers=headers, timeout=REQUESTS_TIMEOUT
)
self._response.raise_for_status()
self._max_retries = max_retries
self._session = session or get_default_authenticator()
self._url = url
self._response = self._get()

@cached_property
def accepts_ranges(self) -> bool:
Expand All @@ -191,10 +192,44 @@ def total_size(self) -> int:
total_size = int(self._response.headers["Content-Length"])
return total_size

def _get(self, start: int = 0) -> Response:
headers = {"Accept-Encoding": "Identity"}
if start > 0:
headers["Range"] = f"bytes={start}-"
response = self._session.get(
self._url, stream=True, headers=headers, timeout=REQUESTS_TIMEOUT
)
response.raise_for_status()
return response

def _iter_content_with_resume(self, chunk_size: int) -> Iterator[bytes]:
fetched_size = 0
retries = 0
while True:
try:
for chunk in self._response.iter_content(chunk_size=chunk_size):
yield chunk
fetched_size += len(chunk)
except (ChunkedEncodingError, ConnectionError):
if (
retries < self._max_retries
and self.accepts_ranges
and fetched_size > 0
):
# only retry if server supports byte ranges
# and we have fetched at least one chunk
# otherwise, we should just fail
retries += 1
self._response = self._get(fetched_size)
continue
raise
else:
break

def download_with_progress(self, chunk_size: int = 1024) -> Iterator[int]:
fetched_size = 0
with atomic_open(self._dest) as f:
for chunk in self._response.iter_content(chunk_size=chunk_size):
for chunk in self._iter_content_with_resume(chunk_size=chunk_size):
if chunk:
f.write(chunk)
fetched_size += len(chunk)
Expand Down
7 changes: 6 additions & 1 deletion tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ def get_options_based_on_normalizer(normalizer: Normalizer) -> Iterator[str]:


@pytest.mark.parametrize(
("name", "value"), [("installer.parallel", True), ("virtualenvs.create", True)]
("name", "value"),
[
("installer.parallel", True),
("virtualenvs.create", True),
("requests.max-retries", 0),
],
)
def test_config_get_default_value(config: Config, name: str, value: bool) -> None:
assert config.get(name) is value
Expand Down
6 changes: 6 additions & 0 deletions tests/console/commands/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_list_displays_default_value_if_not_set(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down Expand Up @@ -92,6 +93,7 @@ def test_list_displays_set_get_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = false
virtualenvs.in-project = null
Expand Down Expand Up @@ -145,6 +147,7 @@ def test_unset_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down Expand Up @@ -176,6 +179,7 @@ def test_unset_repo_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down Expand Up @@ -305,6 +309,7 @@ def test_list_displays_set_get_local_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = false
virtualenvs.in-project = null
Expand Down Expand Up @@ -345,6 +350,7 @@ def test_list_must_not_display_sources_from_pyproject_toml(
installer.parallel = true
keyring.enabled = true
repositories.foo.url = "https://foo.bar/simple/"
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down
6 changes: 5 additions & 1 deletion tests/repositories/test_http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ def test_get_info_from_wheel(
else:
mock_metadata_from_wheel_url.assert_not_called()
mock_download.assert_called_once_with(
url, mocker.ANY, session=repo.session, raise_accepts_ranges=lazy_wheel
url,
mocker.ANY,
session=repo.session,
raise_accepts_ranges=lazy_wheel,
max_retries=0,
)
if lazy_wheel:
assert repo._supports_range_requests[domain] is False
Expand Down
74 changes: 74 additions & 0 deletions tests/utils/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

from poetry.core.utils.helpers import parse_requires
from requests.exceptions import ChunkedEncodingError

from poetry.utils.helpers import Downloader
from poetry.utils.helpers import HTTPRangeRequestSupported
Expand Down Expand Up @@ -150,6 +151,79 @@ def test_download_file(
assert http.last_request().headers["Accept-Encoding"] == "Identity"


def test_download_file_recover_from_error(
http: type[httpretty], fixture_dir: FixtureDirGetter, tmp_path: Path
) -> None:
file_path = fixture_dir("distributions") / "demo-0.1.0.tar.gz"
file_body = file_path.read_bytes()
file_length = len(file_body)
url = "https://foo.com/demo-0.1.0.tar.gz"

def handle_request(
request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any]
) -> tuple[int, dict[str, Any], bytes]:
if request.headers.get("Range") is None:
response_headers["Content-Length"] = str(file_length)
response_headers["Accept-Ranges"] = "bytes"
return 200, response_headers, file_body[: file_length // 2]
else:
start = int(
request.headers.get("Range", "bytes=0-").split("=")[1].split("-")[0]
)
return 206, response_headers, file_body[start:]

http.register_uri(http.GET, url, body=handle_request)
dest = tmp_path / "demo-0.1.0.tar.gz"

download_file(url, dest, chunk_size=file_length // 2, max_retries=1)

expect_sha_256 = "9fa123ad707a5c6c944743bf3e11a0e80d86cb518d3cf25320866ca3ef43e2ad"
assert get_file_hash(dest) == expect_sha_256
assert http.last_request().headers["Accept-Encoding"] == "Identity"
assert http.last_request().headers["Range"] == f"bytes={file_length // 2}-"


def test_download_file_fail_when_no_range(
http: type[httpretty], fixture_dir: FixtureDirGetter, tmp_path: Path
) -> None:
file_path = fixture_dir("distributions") / "demo-0.1.0.tar.gz"
file_body = file_path.read_bytes()
file_length = len(file_body)
url = "https://foo.com/demo-0.1.0.tar.gz"

def handle_request(
request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any]
) -> tuple[int, dict[str, Any], bytes]:
response_headers["Content-Length"] = str(file_length)
return 200, response_headers, file_body[: file_length // 2]

http.register_uri(http.GET, url, body=handle_request)
dest = tmp_path / "demo-0.1.0.tar.gz"
with pytest.raises(ChunkedEncodingError):
download_file(url, dest, chunk_size=file_length // 2, max_retries=1)


def test_download_file_fail_when_first_chunk_failed(
http: type[httpretty], fixture_dir: FixtureDirGetter, tmp_path: Path
) -> None:
file_path = fixture_dir("distributions") / "demo-0.1.0.tar.gz"
file_body = file_path.read_bytes()
file_length = len(file_body)
url = "https://foo.com/demo-0.1.0.tar.gz"

def handle_request(
request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any]
) -> tuple[int, dict[str, Any], bytes]:
response_headers["Content-Length"] = str(file_length)
response_headers["Accept-Ranges"] = "bytes"
return 200, response_headers, file_body[: file_length // 2]

http.register_uri(http.GET, url, body=handle_request)
dest = tmp_path / "demo-0.1.0.tar.gz"
with pytest.raises(ChunkedEncodingError):
download_file(url, dest, chunk_size=file_length, max_retries=1)


@pytest.mark.parametrize(
"hash_types,expected",
[
Expand Down

0 comments on commit fe3641e

Please sign in to comment.