diff --git a/README.md b/README.md index 4285dfa..fa5bd0e 100644 --- a/README.md +++ b/README.md @@ -27,9 +27,9 @@ that defines your fingerprint targets. targets: target-name: include-paths: - - src/**.py # Glob to match all python files recursively under a directory - - src/ # Will match every file under src/, recursively. (Same as 'src/**') - - src # interchangeable with `src/` or `src/**` + - src/**/*.py # Glob to match all python files recursively under a directory + - src/ # Will match every file under src/, recursively. (Same as 'src/**/*.*) + - src # interchangeable with `src/` or `src/**/*.*` - src/foo.py # Include a specific file ``` @@ -50,7 +50,7 @@ targets: - fingerprints.yaml source: depends-on: [dependencies] - include-paths: ['src/**.py'] + include-paths: ['src/**/*.py'] ``` **All paths will be lexicographically sorted at runtime**, however dependencies @@ -60,16 +60,15 @@ are always resolved in the order provided. ### Excluding Files -There may be some paths that you never want to consider. For instance `__pycache__` is -always excluded by default, no matter where it falls. +There may be some paths that you never want to consider. +`.pyc`, `__pycache__` and `.pytest_cache/` are always ignored by default. You can exclude paths at the base of your yaml: ```yaml ignore-paths: - - __pycache__ # Never necessary, this path is always ignored - - .secrets # Entire directory will always be ignored wherever it is in the tree - - secret.py # Will be ignored in every directory it exists in. + - '**/ignore-me.py' # Ignore every 'ignore-me.py' in the tree + - 'src/special/ignore-me-also.py' # Ignores this specific file targets: foo: @@ -77,5 +76,4 @@ targets: include-paths: ['src'] ``` - [common-build-scripts]: https://github.com/uwit-iam/common-build-scripts diff --git a/fingerprinter/fingerprinter.py b/fingerprinter/fingerprinter.py index 3eb5461..94e1481 100644 --- a/fingerprinter/fingerprinter.py +++ b/fingerprinter/fingerprinter.py @@ -12,11 +12,16 @@ from .models import FingerprintConfig + class Fingerprinter: def __init__(self, config: FingerprintConfig): self.config = config self.path_cache = {} - self.ignored_paths = {'__pycache__'} + self.ignored_paths = { + '**/*.pyc', + '**/__pycache__/**', + '**/.pytest_cache/**' + } self.ignored_paths.update(self.config.ignore_paths) self.included_paths = set() @@ -25,8 +30,10 @@ def resolve_path(self, path: str) -> List[str]: if os.path.isfile(path): self.path_cache[path] = [path] elif os.path.isdir(path): - path = os.path.join(path, '*') - self.path_cache[path] = sorted(glob.glob(path)) + glob_ = os.path.join(path, '**', '*.*') + logging.debug(f"Auto-expanding path {path} to glob: {glob_}") + path = glob_ + self.path_cache[path] = sorted(glob.glob(path, recursive=True)) return self.path_cache.get(path, []) @staticmethod @@ -56,18 +63,21 @@ def path_is_ignored(self, filename: str) -> bool: if filename in self.included_paths: return False + paths_to_ignore = set() + if filename not in self.ignored_paths: for p in self.ignored_paths: - if ( # /foo/bar/baz.py will be ignore if 'foo/*' is ignored - ('*' in p and filename in glob.glob(p)) + ('*' in p and filename in glob.glob(p, recursive=True)) # /foo/bar/baz.py will be ignored if 'baz.py' is ignored or os.path.basename(filename) == p # /foo/bar/baz.py will be ignored if '/foo/bar' is ignored or os.path.dirname(filename) == p ): - self.ignored_paths.add(filename) + paths_to_ignore.add(filename) + + self.ignored_paths.update(paths_to_ignore) if filename in self.ignored_paths: return True @@ -77,12 +87,19 @@ def path_is_ignored(self, filename: str) -> bool: def get_path_fingerprint(self, path: str) -> bytes: h = hashlib.sha256() - for fn in sorted(self.resolve_path(path)): - if os.path.isdir(fn): - h.update(self.get_path_fingerprint(fn)) - elif os.path.isfile(fn): - logging.debug(f"Getting fingerprint for file: {fn}") - h.update(self.get_file_sha256sum(fn)) + resolved_paths = sorted(self.resolve_path(path)) + if resolved_paths: + for fn in resolved_paths: + if self.path_is_ignored(fn): + logging.debug(f'Ignoring path "{fn}"') + continue + if os.path.isdir(fn): + h.update(self.get_path_fingerprint(fn)) + elif os.path.isfile(fn): + logging.debug(f"Getting fingerprint for file: {fn}") + h.update(self.get_file_sha256sum(fn)) + else: + logging.warning(f'No files matched path "{path}"') return h.hexdigest().encode('UTF-8') def get_fingerprint_bytes(self, target: str) -> bytes: @@ -97,6 +114,7 @@ def get_fingerprint(self, target: str) -> str: h.update(self.get_fingerprint_bytes(dep)) for path in sorted(target.include_paths): + logging.debug(f'Resolving files for path "{path}"') h.update(self.get_path_fingerprint(path)) return h.hexdigest() diff --git a/fingerprinter/models.py b/fingerprinter/models.py index f052cf0..8262884 100644 --- a/fingerprinter/models.py +++ b/fingerprinter/models.py @@ -4,6 +4,9 @@ class FingerprintTarget(BaseModel): + class Config: + allow_population_by_field_name = True + depends_on: List[str] = Field(default_factory=lambda: [], alias='depends-on') # All directory paths are recursive. @@ -12,5 +15,7 @@ class FingerprintTarget(BaseModel): class FingerprintConfig(BaseModel): - ignore_paths: List[str] = Field(default_factory=lambda: ['__pycache__'], alias='ignore-paths') + class Config: + allow_population_by_field_name = True + ignore_paths: List[str] = Field(default_factory=lambda: [], alias='ignore-paths') targets: Dict[str, FingerprintTarget] diff --git a/fingerprints.yaml b/fingerprints.yaml index 4e247d7..1d69bd3 100644 --- a/fingerprints.yaml +++ b/fingerprints.yaml @@ -1,17 +1,16 @@ - ```yaml - # This example has a source fingerprint that is generated for all python files - # under the src/ directory, but the fingerprint is dependent on the - # dependency locks. This means that even if all python files remain - # untouched, an update to the dependencies will generate a new - # source fingerprint. - # `fingerprints.yaml` is also included here to ensure that changes - # to the actual fingerprint configuration regenerates all fingerprints. - targets: - dependencies: - include-paths: - - poetry.lock - - fingerprints.yaml - source: - depends-on: [dependencies] - include-paths: ['fingerprinter/**.py'] - ``` +# This example has a source fingerprint that is generated for all python files +# under the src/ directory, but the fingerprint is dependent on the +# dependency locks. This means that even if all python files remain +# untouched, an update to the dependencies will generate a new +# source fingerprint. +# `fingerprints.yaml` is also included here to ensure that changes +# to the actual fingerprint configuration regenerates all fingerprints. +targets: + dependencies: + include-paths: + - poetry.lock + - fingerprints.yaml + source: + depends-on: [dependencies] + include-paths: + - fingerprinter diff --git a/poetry.lock b/poetry.lock index 3f34752..9869ad0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -12,6 +12,7 @@ mypy-extensions = ">=0.4.3" pathspec = ">=0.9.0" platformdirs = ">=2" tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -225,8 +226,8 @@ testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", [metadata] lock-version = "1.1" -python-versions = "^3.8,^3.9,^3.10" -content-hash = "ef8db48f1b589d687a107aa723caf15711b2d0c89c5a7d9d97a050e11f8f39c7" +python-versions = ">=3.8" +content-hash = "190e4da022ac1b11875ca651609a3948c18c3154900eb73556f71dd037b16947" [metadata.files] black = [