Skip to content

Commit

Permalink
WIP: More s3 testing
Browse files Browse the repository at this point in the history
  • Loading branch information
MHendricks committed Jan 18, 2025
1 parent 66629c7 commit e0c3980
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 39 deletions.
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ def generate(cls, root, versions=None, zip_created=None, zip_root=None):
filename = root / f"{name}.zip"
ver = version.version if version.inc_version else None
with ZipFile(filename, "w") as zf:
# Make the .zip file larger than the remotezip initial_buffer_size
# so testing of partial archive reading is forced use multiple requests
zf.writestr("data.txt", "-" * 64 * 1024)
zf.writestr(
".hab.json",
cls.hab_json(version.name, version=ver, distros=version.distros),
Expand Down
245 changes: 206 additions & 39 deletions tests/test_distro_finder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import glob
import logging
import os
import sys
from pathlib import Path

import pytest
Expand Down Expand Up @@ -75,43 +76,6 @@ def test_glob_path(config_root, glob_str, count):
assert len(result) == count


class TestLoadPath:
"""Test the various `DistroFinder.load_path` implementations."""

def test_distro_finder(self, uncached_resolver):
"""Currently load_path for DistroFinder just returns None."""
finder = distro_finder.DistroFinder("", uncached_resolver.site)
assert finder.load_path(Path(".")) is None

def test_zip_sidecar(self, zip_distro_sidecar):
"""The Zip Sidecar reads a .json file next to the zip distro.
Ensure it's able to read data from the .json file.
"""
finder = zip_sidecar.DistroFinderZipSidecar(zip_distro_sidecar.root)

# This distro hard codes the version inside the .json file
data = finder.load_path(zip_distro_sidecar.root / "dist_a_v0.1.hab.json")
assert data["name"] == "dist_a"
assert "distros" not in data
assert data["version"] == "0.1"

# Test a different distro that doesn't hard code the version
data = finder.load_path(zip_distro_sidecar.root / "dist_b_v0.5.hab.json")
assert data["name"] == "dist_b"
assert "distros" not in data
assert data["version"] == "0.5"

# This distro includes required distros
data = finder.load_path(zip_distro_sidecar.root / "dist_a_v0.2.hab.json")
assert data["name"] == "dist_a"
assert data["distros"] == ["dist_b"]
assert data["version"] == "0.2"

def test_s3(self):
pass


class CheckDistroFinder:
distro_finder_cls = distro_finder.DistroFinder
site_template = "site_distro_finder.json"
Expand Down Expand Up @@ -180,19 +144,54 @@ def test_content(self, distro_finder_info):
result = finder.content(path)
assert result == distro_finder_info.root

def test_load_path(self, uncached_resolver):
"""Currently load_path for DistroFinder just returns None."""
finder = distro_finder.DistroFinder("", uncached_resolver.site)
assert finder.load_path(Path(".")) is None

def test_installed(self, distro_finder_info, helpers, tmp_path):
self.check_installed(distro_finder_info, helpers, tmp_path)

def test_install(self, distro_finder_info, helpers, tmp_path):
self.check_install(distro_finder_info, helpers, tmp_path)

def test_clear_cache(self, distro_finder_info):
"""Cover the clear_cache function, which for this class does nothing."""
finder = self.distro_finder_cls(distro_finder_info.root)
finder.clear_cache()


class TestZipSidecar(CheckDistroFinder):
"""Tests specific to `DistroFinderZipSidecar`."""

distro_finder_cls = zip_sidecar.DistroFinderZipSidecar
site_template = "site_distro_zip_sidecar.json"

def test_load_path(self, zip_distro_sidecar):
"""The Zip Sidecar reads a .json file next to the zip distro.
Ensure it's able to read data from the .json file.
"""
finder = self.distro_finder_cls(zip_distro_sidecar.root)

# This distro hard codes the version inside the .json file
data = finder.load_path(zip_distro_sidecar.root / "dist_a_v0.1.hab.json")
assert data["name"] == "dist_a"
assert "distros" not in data
assert data["version"] == "0.1"

# Test a different distro that doesn't hard code the version
data = finder.load_path(zip_distro_sidecar.root / "dist_b_v0.5.hab.json")
assert data["name"] == "dist_b"
assert "distros" not in data
assert data["version"] == "0.5"

# This distro includes required distros
data = finder.load_path(zip_distro_sidecar.root / "dist_a_v0.2.hab.json")
assert data["name"] == "dist_a"
assert data["distros"] == ["dist_b"]
assert data["version"] == "0.2"

def test_installed(self, zip_distro_sidecar, helpers, tmp_path):
self.check_installed(zip_distro_sidecar, helpers, tmp_path)

Expand Down Expand Up @@ -238,7 +237,7 @@ def test_load_path(self, zip_distro):
Ensure it's able to read data from the .json file.
"""
finder = df_zip.DistroFinderZip(zip_distro.root)
finder = self.distro_finder_cls(zip_distro.root)

# This distro hard codes the version inside the .json file
data = finder.load_path(zip_distro.root / "dist_a_v0.1.zip")
Expand Down Expand Up @@ -293,24 +292,132 @@ def test_installed(self, zip_distro, helpers, tmp_path):
def test_install(self, zip_distro, helpers, tmp_path):
self.check_install(zip_distro, helpers, tmp_path)

def test_clear_cache(self, distro_finder_info):
"""Test the clear_cache function for this class."""
finder = self.distro_finder_cls(distro_finder_info.root)
finder._cache["test"] = "case"
finder.clear_cache()
assert finder._cache == {}


# These tests only work if using the `pyXX-s3` tox testing env
@pytest.mark.skipif(
not os.getenv("VIRTUAL_ENV", "").endswith("-s3"),
reason="not testing optional s3 cloud",
)
class TestS3(CheckDistroFinder):
"""Tests specific to `DistroFinderS3Zip`."""
"""Tests specific to `DistroFinderS3Zip`.
Note: All tests should use the `zip_distro_s3` fixture. This ensures that
any s3 requests are local and also speeds up the test.
"""

site_template = "site_distro_s3.json"

class ServerSimulator:
"""Requests server used for testing downloading a partial zip file.
Based on remotezip test code:
https://github.com/gtsystem/python-remotezip/blob/master/test_remotezip.py
"""

def __init__(self, fname):
self._fname = fname
self.requested_ranges = []

def serve(self, request, context):
import remotezip

from_byte, to_byte = remotezip.RemoteFetcher.parse_range_header(
request.headers["Range"]
)
self.requested_ranges.append((from_byte, to_byte))

with open(self._fname, "rb") as f:
if from_byte < 0:
f.seek(0, 2)
size = f.tell()
f.seek(max(size + from_byte, 0), 0)
init_pos = f.tell()
content = f.read(min(size, -from_byte))
else:
f.seek(from_byte, 0)
init_pos = f.tell()
content = f.read(to_byte - from_byte + 1)

context.headers[
"Content-Range"
] = remotezip.RemoteFetcher.build_range_header(
init_pos, init_pos + len(content)
)
return content

@property
def distro_finder_cls(self):
"""Only import this class if the test is not skipped."""
from hab.distro_finders.s3_zip import DistroFinderS3Zip

return DistroFinderS3Zip

def test_load_path(self, zip_distro_s3, helpers, tmp_path, requests_mock):
"""Simulate reading only part of a remote zip file hosted in an aws s3 bucket.
This doesn't actually connect to an aws s3 bucket, it uses mock libraries
to simulate the process.
"""
import boto3

if sys.version_info.minor <= 7:
# NOTE: boto3 has dropped python 3.7. Moto changed their context name
# when they dropped support for python 3.7.
from moto import mock_s3 as mock_aws
else:
from moto import mock_aws

# Make requests connect to a simulated s3 server that supports the range header
server = self.ServerSimulator(
zip_distro_s3.zip_root / "hab-test-bucket" / "dist_a_v0.1.zip"
)
requests_mock.register_uri(
"GET",
"s3://hab-test-bucket/dist_a_v0.1.zip",
content=server.serve,
status_code=200,
)

# Create a mock aws setup using moto to test the authorization code
resolver = self.create_resolver(zip_distro_s3.zip_root, helpers, tmp_path)
dl_finder = resolver.site.downloads["distros"][0]

with mock_aws():
# The LocalS3Client objects don't have all of the s3 properties we
# require for configuring requests auth. Add them and crate the bucket.
sess = boto3.Session(region_name="us-east-2")
conn = boto3.resource("s3", region_name="us-east-2")
conn.create_bucket(
Bucket="hab-test-bucket",
CreateBucketConfiguration={"LocationConstraint": "us-east-2"},
)
dl_finder.client.sess = sess
dl_finder.client.client = sess.client("s3", region_name="us-east-2")

# Test reading .hab.json from inside a remote .zip file.
dl_finder = resolver.site.downloads["distros"][0]
zip_path = dl_finder.root / "dist_a_v0.1.zip"
archive = dl_finder.archive(zip_path)

# Check that the filename property is always populated
assert str(archive.filename) == str(zip_path)

# Check that we were able to read the data from the archive
data = dl_finder.load_path(zip_path / ".hab.json")
assert data["name"] == "dist_a"
assert data["version"] == "0.1"

# Verify that remotezip had to make more than one request. This is because
# the .zip file is larger than `initial_buffer_size`.
assert len(server.requested_ranges) == 2

def test_installed(self, zip_distro_s3, helpers, tmp_path):
self.check_installed(zip_distro_s3, helpers, tmp_path)

Expand All @@ -334,6 +441,66 @@ def test_client(self, zip_distro_s3, helpers, tmp_path):
finder.client = "A custom client"
assert finder.client == "A custom client"

# Test init with a custom client
from cloudpathlib.local import LocalS3Client

client = LocalS3Client()
finder = self.distro_finder_cls("s3://hab-test-bucket", client=client)
assert finder.client == client

def test_as_posix(self, zip_distro_s3):
"""Cloudpathlib doesn't support `as_posix` a simple str is returned."""
# Test that as_posix for CloudPath's returns the CloudPath as a str
finder = self.distro_finder_cls("s3://hab-test-bucket")
assert finder.as_posix() == "s3://hab-test-bucket"

# Otherwise it returns a standard pathlib.Path.as_posix value
finder.root = zip_distro_s3.root
assert finder.as_posix() == zip_distro_s3.root.as_posix()

def test_clear_cache(self, zip_distro_s3):
"""Test the clear_cache function for this class."""

class Archive:
"""Simulated ZipFile class to test that open archives get closed."""

def __init__(self):
self.is_open = True

def close(self):
self.is_open = False

class Client:
"""Simulated S3Client to test calling clear_cache on."""

def __init__(self):
self.cleared = False

def clear_cache(self):
self.cleared = True

finder = self.distro_finder_cls("s3://hab-test-bucket")
# Simulate use of the finder
archive = Archive()
finder._archives["s3://hab-test-bucket/dist_a_v0.1.zip"] = archive
finder._cache["test"] = "case"
finder.client = Client()
assert archive.is_open
assert not finder.client.cleared

# Check that clearing reset the cache variables
finder.clear_cache()
assert finder._archives == {}
assert finder._cache == {}
# Check that any open archives were closed
assert not archive.is_open
# Check that the client was not cleared as persistent is False
assert not finder.client.cleared

# Clearing of persistent caches clears the cache
finder.clear_cache(persistent=True)
assert finder.client.cleared


# TODO: Break this into separate smaller tests of components for each class not this
@pytest.mark.parametrize(
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ deps =
pytest
json5: pyjson5
s3: .[s3]
s3: moto[s3]
s3: requests-mock
commands =
coverage run -m pytest {tty:--color=yes} {posargs:tests/}

Expand Down

0 comments on commit e0c3980

Please sign in to comment.