diff --git a/README.md b/README.md index c685ab3..be3eb9c 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,96 @@ # drover -*drover: a command-line utility to deploy Python packages to Lambda functions* +*drover: a command-line utility to deploy Python packages to AWS Lambda functions* [![circleci](https://circleci.com/gh/jwilges/drover/tree/master.svg?style=shield)](https://circleci.com/gh/jwilges/drover/tree/master) [![codecov](https://codecov.io/gh/jwilges/drover/branch/master/graph/badge.svg)](https://codecov.io/gh/jwilges/drover/branch/master) +[![pypi release](https://img.shields.io/pypi/v/drover)](https://pypi.org/project/drover) +![pypi monthly downloads](https://img.shields.io/pypi/dm/drover) +![license](https://img.shields.io/github/license/jwilges/drover) + +## Background +This utility aims to provide a simple, repeatable, and efficient process for +deploying a Python package as a Lambda. + +To encourage separating infrequently changing Python dependencies in a separate +"requirements" layer, by default `drover` requires a list of regular expressions +to define which files to include in the Lambda function; all other files are +placed in a requirements layer that is then attached to the Lambda function. + +Next, `drover` generates and stores hashes for both the Lambda function and the +requirements layer. This allows `drover` to avoid redundantly updating the +Lambda function and/or requirements layer if no package contents have changed. + +As much as possible, `drover` avoids altering existing infrastructure. +Infrastructure utilities such as +[Terraform](https://github.com/hashicorp/terraform) may be used to create a +Lambda and manage its surrounding resources and `drover` may be used to update +the Lambda function as well as its layers. ## Supported Platforms -This utility has been tested on macOS Catalina 10.15. +This utility is continuously unit tested on a GNU/Linux system with Python 3.6, +3.7, and 3.8. ## Usage +### Settings +The following `drover.yml` settings file demonstrates how to configure a +`staging` stage that may be used to deploy a Python package to a Lambda named +`basic-lambda` in the `us-east-1` region: + +```yaml +stages: + staging: + region_name: us-east-1 + function_name: basic-lambda + compatible_runtime: python3.8 + function_file_patterns: + - '^basic_lambda.*' + function_extra_paths: + - instance + upload_bucket: + region_name: us-east-1 + bucket_name: drover-examples +``` + +The `compatible_runtime` value will be used to define the compatible runtime for +both the requirements layer (if present) and the Lambda function. + +While processing files from the install path (see: `--install-path` below), any +files matching regular expressions defined in the `function_file_patterns` list +will be included in the function; any remaining files will be included in the +requirements layer. + +The `function_extra_paths` list may contain additional paths to include in the +function layer archive; non-absolute paths will be relative to the current +working directory. + +The `upload_bucket` map may provide a S3 Bucket name and its associated region +for use when uploading Lambda function and layer archive files. + +### Command line interface +Assuming a Python package exists in the `basic_lambda` directory, the following +commands demonstrate a simple Lambda deploy with `drover`: + + pip install --target install basic_lambda + drover --install-path install staging + +Assuming the Lambda is not already up to date, `drover` will attempt to upload +the latest source and update the Lambda function: + + Requirements digest: None + Function digest: 0b37cf78f6ad4c137fb1f77751c0c0e759dd2d6c515937d33fae435b9e091f72 + Skipping requirements upload + Uploading function archive... + Failed to upload function archive to bucket; falling back to direct file upload. + Updating function resource... + Updated function "basic-lambda" resource; size: 1.78 KiB; ARN: arn:aws:lambda:us-east-1:977874552542:function:basic-lambda + +### Additional examples +For more examples, see the [examples](examples/README.md) directory. + +## How to contribute +Contributions are welcome in the form of inquiries, issues, and pull requests. + ### Development Environment -Initialize a development environment by executing `nox -s dev-3.8`; the -`drover` utility will be installed in the `.nox/dev-3-8` Python virtual -environment binary path. \ No newline at end of file +Initialize a development environment by executing `nox -s dev-3.8`; the `drover` +utility will be installed in the `.nox/dev-3-8` Python virtual environment +binary path. \ No newline at end of file diff --git a/drover/__init__.py b/drover/__init__.py index 8688d6f..98e9dbf 100644 --- a/drover/__init__.py +++ b/drover/__init__.py @@ -16,7 +16,8 @@ import tqdm from pydantic import BaseModel -from drover.io import ArchiveMapping, format_file_size, get_digest, get_relative_file_names, write_archive +from drover.io import (ArchiveMapping, FunctionLayerMappings, + format_file_size, get_digest, get_relative_file_names, write_archive) from drover.models import S3BucketFileVersion, S3BucketPath, Settings, Stage __version__ = '0.7.1' @@ -42,21 +43,10 @@ def __init__(self, settings: Settings, stage: str, interactive: bool = False): raise SettingsError(f'Invalid stage name: {stage}') self.stage = self.settings.stages[stage] - - self.requirements_layer_name = f'{self.stage.function_name}-requirements' self.compatible_runtime_library_path = Drover._get_runtime_library_path(self.stage.compatible_runtime) - self.lambda_client = boto3.client('lambda', region_name=self.stage.region_name) - def update(self, install_path: Path) -> None: - """Publish and/or update a Lambda function and/or requirements layer representation of a Python package directory - - Args: - install_path: a Python package directory (e.g. via `pip install -t `)""" - - if not install_path.is_dir(): - raise UpdateError(f'Install path is invalid: {install_path}') - + def _get_function_layer_mappings(self, install_path: Path) -> FunctionLayerMappings: requirements_base_path = self.compatible_runtime_library_path function_file_patterns = self.stage.function_file_patterns @@ -98,6 +88,23 @@ def _log(header: str, mappings: Sequence[ArchiveMapping]): _logger.info('Requirements digest: %s', requirements_digest) _logger.info('Function digest: %s', function_digest) + return FunctionLayerMappings( + function_mappings=function_mappings, + function_digest=function_digest, + requirements_mappings=requirements_mappings, + requirements_digest=requirements_digest) + + def update(self, install_path: Path) -> None: + """Publish and/or update a Lambda function and/or requirements layer representation of a Python package directory + + Args: + install_path: a Python package directory (e.g. via `pip install -t `)""" + + if not install_path.is_dir(): + raise UpdateError(f'Install path is invalid: {install_path}') + + mappings = self._get_function_layer_mappings(install_path) + try: function_response = self.lambda_client.get_function(FunctionName=self.stage.function_name) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: @@ -106,9 +113,11 @@ def _log(header: str, mappings: Sequence[ArchiveMapping]): function_arn = function_response['Configuration']['FunctionArn'] function_layer_arns: List[str] = [layer['Arn'] for layer in function_response['Configuration'].get('Layers', [])] function_runtime = function_response['Configuration']['Runtime'] - function_tags: Mapping[str, str] = function_response['Tags'] or {} + function_tags: Mapping[str, str] = function_response.get('Tags', {}) head_requirements_digest = function_tags.get('HeadRequirementsDigest') head_requirements_layer_arn = function_tags.get('HeadRequirementsLayerArn') + head_function_layer_arns = [arn for arn in (*self.stage.supplemental_layer_arns, + head_requirements_layer_arn) if arn] head_function_digest = function_tags.get('HeadFunctionDigest') head_requirements_layer_arn_missing = True @@ -117,50 +126,55 @@ def _log(header: str, mappings: Sequence[ArchiveMapping]): self.lambda_client.get_layer_version_by_arn(Arn=head_requirements_layer_arn) head_requirements_layer_arn_missing = False except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: - _logger.warning('Unable to retrieve requirements layer "%s"; forcing re-upload.', head_requirements_layer_arn) + _logger.warning('Unable to retrieve requirements layer "%s"; forcing re-upload.', + head_requirements_layer_arn) _logger.debug('', exc_info=e) - should_upload_requirements = any(( + should_upload_requirements = mappings.requirements_mappings and any(( not head_requirements_digest, not head_requirements_layer_arn, head_requirements_layer_arn_missing, - head_requirements_digest != requirements_digest)) + head_requirements_digest != mappings.requirements_digest)) if should_upload_requirements: - requirements_layer_arn = self._upload_requirements_archive(requirements_mappings, requirements_digest) - function_tags['HeadRequirementsDigest'] = requirements_digest + requirements_layer_arn = self._upload_requirements_archive(mappings.requirements_mappings, + mappings.requirements_digest) + function_tags['HeadRequirementsDigest'] = mappings.requirements_digest function_tags['HeadRequirementsLayerArn'] = requirements_layer_arn else: requirements_layer_arn = head_requirements_layer_arn + function_tags.pop('HeadRequirementsDigest', None) + function_tags.pop('HeadRequirementsLayerArn', None) _logger.info('Skipping requirements upload') - if function_runtime != self.stage.compatible_runtime or requirements_layer_arn not in function_layer_arns: + if function_runtime != self.stage.compatible_runtime or function_layer_arns != head_function_layer_arns: _logger.info('Updating function resource...') - function_layer_arns = [requirements_layer_arn] try: self.lambda_client.update_function_configuration( FunctionName=self.stage.function_name, Runtime=self.stage.compatible_runtime, - Layers=function_layer_arns) + Layers=head_function_layer_arns) except botocore.exceptions.BotoCoreError as e: raise UpdateError(f'Failed to update function "{self.stage.function_name}" runtime and layers: {e}') _logger.info('Updated function "%s" resource; runtime: "%s"; layers: %s', self.stage.function_name, self.stage.compatible_runtime, function_layer_arns) - if not head_function_digest or head_function_digest != function_digest: - self._upload_function_archive(function_mappings) - function_tags['HeadFunctionDigest'] = function_digest + if not head_function_digest or head_function_digest != mappings.function_digest: + self._upload_function_archive(mappings.function_mappings) + function_tags['HeadFunctionDigest'] = mappings.function_digest else: _logger.info('Skipping function upload') - try: - self.lambda_client.tag_resource(Resource=function_arn, Tags=function_tags) - except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: - raise UpdateError(f'Unable to update tags for Lambda function "{self.stage.function_name}": {e}') + function_tags = {key: value for key, value in function_tags.items() if value} + if function_tags: + try: + self.lambda_client.tag_resource(Resource=function_arn, Tags=function_tags) + except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: + raise UpdateError(f'Unable to update tags for Lambda function "{self.stage.function_name}": {e}') - def _upload_file_to_bucket(self, file_name: Path, file_size: float) -> S3BucketFileVersion: + def _upload_file_to_bucket(self, file_name: Path) -> S3BucketFileVersion: upload_bucket: S3BucketPath = self.stage.upload_bucket s3_client = boto3.client('s3', region_name=upload_bucket.region_name) - + file_size = float(file_name.stat().st_size) key = f'{upload_bucket.prefix}{file_name.name}' with tqdm.tqdm(total=file_size, unit='B', unit_divisor=1024, unit_scale=True, leave=True, disable=not self.interactive) as progress: @@ -199,19 +213,18 @@ def _upload() -> str: write_archive(archive_file_name, archive_mappings) finally: os.close(archive_handle) - archive_size = float(archive_file_name.stat().st_size) if self.stage.upload_bucket: _logger.info('Uploading requirements layer archive...') try: - bucket_file = self._upload_file_to_bucket(archive_file_name, archive_size) + bucket_file = self._upload_file_to_bucket(archive_file_name) file_arguments = { 'S3Bucket': bucket_file.bucket_name, 'S3Key': bucket_file.key, } if bucket_file.version_id: file_arguments['S3ObjectVersion'] = bucket_file.version_id - except botocore.exceptions.BotoCoreError as e: + except (botocore.exceptions.ClientError, boto3.exceptions.S3UploadFailedError) as e: _logger.error('Failed to upload requirements archive to bucket; falling back to direct file upload.') _logger.debug('', exc_info=e) bucket_file = None @@ -224,7 +237,7 @@ def _upload() -> str: _logger.info('Publishing requirements layer...') try: response = self.lambda_client.publish_layer_version( - LayerName=self.requirements_layer_name, + LayerName=self.stage.requirements_layer_name, Description=archive_description, Content=file_arguments, CompatibleRuntimes=[self.stage.compatible_runtime]) @@ -237,7 +250,7 @@ def _upload() -> str: layer_version_arn = response['LayerVersionArn'] layer_size_text = format_file_size(float(response['Content']['CodeSize'])) _logger.info('Published requirements layer "%s"; size: %s; ARN: %s', - self.requirements_layer_name, layer_size_text, layer_version_arn) + self.stage.requirements_layer_name, layer_size_text, layer_version_arn) return layer_version_arn @@ -255,19 +268,18 @@ def _upload() -> str: write_archive(archive_file_name, archive_mappings) finally: os.close(archive_handle) - archive_size = float(archive_file_name.stat().st_size) if self.stage.upload_bucket: _logger.info('Uploading function archive...') try: - bucket_file = self._upload_file_to_bucket(archive_file_name, archive_size) + bucket_file = self._upload_file_to_bucket(archive_file_name) file_arguments = { 'S3Bucket': bucket_file.bucket_name, 'S3Key': bucket_file.key, } if bucket_file.version_id: file_arguments['S3ObjectVersion'] = bucket_file.version_id - except botocore.exceptions.BotoCoreError as e: + except (botocore.exceptions.ClientError, boto3.exceptions.S3UploadFailedError) as e: _logger.error('Failed to upload function archive to bucket; falling back to direct file upload.') _logger.debug('', exc_info=e) bucket_file = None diff --git a/drover/cli.py b/drover/cli.py index 089f2c1..3cded23 100644 --- a/drover/cli.py +++ b/drover/cli.py @@ -52,7 +52,7 @@ def main(): if not arguments.quiet: logging.basicConfig(format='%(message)s', stream=sys.stdout) logging_level = max(1, logging.INFO - (10 * arguments.verbose)) - _logger.setLevel(logging_level) + logging.getLogger(__name__.split('.')[0]).setLevel(logging_level) interactive = True if arguments.interactive else False if arguments.non_interactive else sys.__stdin__.isatty() diff --git a/drover/io.py b/drover/io.py index a61dcc9..7811e2a 100644 --- a/drover/io.py +++ b/drover/io.py @@ -6,7 +6,7 @@ import re import os import zipfile -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Iterable, Pattern, Sequence @@ -18,6 +18,15 @@ class ArchiveMapping: archive_file_name: Path +@dataclass +class FunctionLayerMappings: + """A function and requirements layer mapping and digest container""" + function_mappings: Sequence[ArchiveMapping] = field(default=list) + function_digest: str = None + requirements_mappings: Sequence[ArchiveMapping] = field(default=list) + requirements_digest: str = None + + def format_file_size(size_in_bytes: float) -> str: """Return a string representation of the specified size as its largest 2^10 representation @@ -51,6 +60,8 @@ def get_digest(source_file_names: Sequence[Path], block_size: int = 8192) -> str digest = hashlib.sha256() full = set(source_file_names) done = set() + if not full: + return None for source_file_name in sorted(full): if package_record_pattern.search(str(source_file_name)): package_parent_path = source_file_name.parent.parent diff --git a/drover/models.py b/drover/models.py index 161d12c..165c9b6 100644 --- a/drover/models.py +++ b/drover/models.py @@ -24,9 +24,16 @@ class Stage(BaseModel): compatible_runtime: str function_file_patterns: Sequence[Pattern] function_extra_paths: Sequence[Path] = [] + requirements_layer_name: Optional[str] + supplemental_layer_arns: Sequence[str] = [] package_exclude_patterns: Sequence[Pattern] = [re.compile(r'.*__pycache__.*')] upload_bucket: Optional[S3BucketPath] + def __init__(self, **kwargs): + super().__init__(**kwargs) + if not self.requirements_layer_name: + self.requirements_layer_name = f'{self.function_name}-requirements' + class Settings(BaseModel): stages: Mapping[str, Stage] diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..7bf230d --- /dev/null +++ b/examples/README.md @@ -0,0 +1,3 @@ +# Examples + +- [basic_lambda](basic_lambda/README.md): A basic Lambda that returns its version; this Lambda intentionally has no external dependencies. \ No newline at end of file diff --git a/examples/basic_lambda/README.md b/examples/basic_lambda/README.md new file mode 100644 index 0000000..77a4d07 --- /dev/null +++ b/examples/basic_lambda/README.md @@ -0,0 +1,58 @@ +# basic_lambda +A basic Lambda that returns its version; this Lambda intentionally has no external dependencies. + +## Prerequisites +Before running this example, ensure: +- to install the `drover` and `invoke` Python packages (e.g. via `pip`), +- `~/.aws/credentials` contains a feasible profile named `examples`, +- a Lambda named `basic-lambda` exists in `us-east-1`, and +- the `basic-lambda` handler is set to `basic_lambda.lambda_handler`. + +## Sample deploy +Assuming the AWS profile `examples` has sufficient privileges, the command: + +`AWS_PROFILE=examples invoke deploy` + +should deploy the `basic_lambda` example: + +``` ++ python -m venv --clear /tmp/venv ++ /tmp/venv/bin/pip install wheel +Collecting wheel + Downloading https://files.pythonhosted.org/packages/8c/23/848298cccf8e40f5bbb59009b32848a4c38f4e7f3364297ab3c3e2e2cd14/wheel-0.34.2-py2.py3-none-any.whl +Installing collected packages: wheel +Successfully installed wheel-0.34.2 ++ /tmp/venv/bin/pip install --target /build -r requirements.txt . +Processing /var/task +Building wheels for collected packages: basic-lambda + Building wheel for basic-lambda (setup.py): started + Building wheel for basic-lambda (setup.py): finished with status 'done' + Created wheel for basic-lambda: filename=basic_lambda-0.0.1-cp38-none-any.whl size=1596 sha256=ed817c51edb7bb61a21bb96bd0f81674248af6815ae4728512668d6bfad0bc87 + Stored in directory: /tmp/pip-ephem-wheel-cache-12e6237i/wheels/87/fe/86/32f1e59dd6105e277388fa8d76ea702b40d0d4e6f71aad7438 +Successfully built basic-lambda +Installing collected packages: basic-lambda +Successfully installed basic-lambda-0.0.1 +Requirements digest: None +Function digest: 0b37cf78f6ad4c137fb1f77751c0c0e759dd2d6c515937d33fae435b9e091f72 +Skipping requirements upload +Uploading function archive... +Failed to upload function archive to bucket; falling back to direct file upload. +Updating function resource... +Updated function "basic-lambda" resource; size: 1.78 KiB; ARN: arn:aws:lambda:us-east-1:977874552542:function:basic-lambda +``` + +## Sample synchronous request +Assuming the AWS profile `examples` has sufficient privileges, the command: + +`AWS_PROFILE=examples invoke request` + +should synchronously invoke the deployed Lambda and return its response: + +``` +{ + "StatusCode": 200, + "ExecutedVersion": "$LATEST" +} +Lambda Output: +b'"0.0.1"' +``` \ No newline at end of file diff --git a/examples/basic_lambda/basic_lambda/__init__.py b/examples/basic_lambda/basic_lambda/__init__.py new file mode 100644 index 0000000..f834bf0 --- /dev/null +++ b/examples/basic_lambda/basic_lambda/__init__.py @@ -0,0 +1,5 @@ +"""A basic Lambda that returns its version""" +__version__ = '0.0.1' + +def lambda_handler(_event, _context): + return __version__ diff --git a/examples/basic_lambda/drover.yml b/examples/basic_lambda/drover.yml new file mode 100644 index 0000000..d80450a --- /dev/null +++ b/examples/basic_lambda/drover.yml @@ -0,0 +1,12 @@ +stages: + production: + region_name: us-east-1 + function_name: basic-lambda + compatible_runtime: python3.8 + function_file_patterns: + - '^basic_lambda.*' + function_extra_paths: + - instance + upload_bucket: + region_name: us-east-1 + bucket_name: drover-examples \ No newline at end of file diff --git a/examples/basic_lambda/requirements.txt b/examples/basic_lambda/requirements.txt new file mode 100644 index 0000000..1a4f201 --- /dev/null +++ b/examples/basic_lambda/requirements.txt @@ -0,0 +1,6 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile +# diff --git a/examples/basic_lambda/setup.py b/examples/basic_lambda/setup.py new file mode 100644 index 0000000..abc62f4 --- /dev/null +++ b/examples/basic_lambda/setup.py @@ -0,0 +1,49 @@ +import ast +import codecs +import os.path + +import setuptools + + +def read(*path_parts, iterate_lines=False): + source_file_name = os.path.join(*path_parts) + if not os.path.isfile(source_file_name): + raise FileNotFoundError(source_file_name) + with codecs.open(source_file_name, 'r') as source_file: + return source_file.readlines() if iterate_lines else source_file.read() + + +def get_version(*path_parts) -> str: + # See: + for line in read(*path_parts, iterate_lines=True): + if line.startswith('__version__'): + return ast.parse(line).body[0].value.s + raise RuntimeError('Unable to determine version.') + + +HERE = os.path.abspath(os.path.dirname(__file__)) +VERSION = get_version(HERE, 'basic_lambda', '__init__.py') +LONG_DESCRIPTION = read(HERE, 'README.md') + + +setuptools.setup( + name='basic_lambda', + version=VERSION, + author='Jeffrey Wilges', + author_email='jeffrey@wilges.com', + description='a basic Lambda that returns its version', + long_description=LONG_DESCRIPTION, + long_description_content_type='text/markdown', + url='https://github.com/jwilges/drover', + license='BSD', + packages=setuptools.find_packages(exclude=['tests*']), + python_requires='>=3.8', + install_requires=[], + classifiers=[ + 'Development Status :: 4 - Beta', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3.8', + 'Topic :: System :: Distributed Computing', + 'Topic :: System :: Networking', + ], +) diff --git a/examples/basic_lambda/tasks.py b/examples/basic_lambda/tasks.py new file mode 100644 index 0000000..985dba1 --- /dev/null +++ b/examples/basic_lambda/tasks.py @@ -0,0 +1,34 @@ +import tempfile +from pathlib import Path + +from invoke import task + +_SOURCE_PATH = Path(__file__).parent.absolute() + + +@task +def deploy(context): + with tempfile.TemporaryDirectory(prefix='.deploy-', dir=_SOURCE_PATH) as deploy_path: + deploy_path = Path(deploy_path).absolute() + context.run( + f'docker run -v {_SOURCE_PATH}:/var/task -v {deploy_path}:/build ' + 'lambci/lambda:build-python3.8 ' + 'sh -c "set -x && ' + 'python -m venv --clear /tmp/venv && ' + '/tmp/venv/bin/pip install wheel && ' + '/tmp/venv/bin/pip install --target /build -r requirements.txt ."') + + # The `drover` utility configuration may contain files relative to the source working directory. + with context.cd(str(_SOURCE_PATH)): + context.run(f'drover --install-path {deploy_path} production') + + +@task +def request(context): + with tempfile.NamedTemporaryFile(prefix='output', dir=_SOURCE_PATH) as output_file: + context.run( + 'aws --region us-east-1 lambda invoke ' + '--function-name "basic-lambda" --invocation-type "RequestResponse" ' + f'{output_file.name}') + output_file.seek(0) + print('Lambda Output:', output_file.read(), sep='\n') diff --git a/tests/test_drover.py b/tests/test_drover.py index c45d900..d9b7a44 100644 --- a/tests/test_drover.py +++ b/tests/test_drover.py @@ -61,7 +61,7 @@ def test_init_with_valid_settings_and_stage(self): assert drover.settings == expected_settings assert drover.interactive == expected_interactive assert drover.stage == expected_stage - assert drover.requirements_layer_name == expected_requirements_layer_name + assert drover.stage.requirements_layer_name == expected_requirements_layer_name assert drover.compatible_runtime_library_path == expected_compatible_runtime_library_path assert drover.lambda_client == mock_boto3_client