Skip to content

Commit

Permalink
Fix several bugs in path resolution.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom Thorogood committed Mar 30, 2022
1 parent 31231dc commit 6ee0414
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 42 deletions.
18 changes: 8 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ that defines your fingerprint targets.
targets:
target-name:
include-paths:
- src/**.py # Glob to match all python files recursively under a directory
- src/ # Will match every file under src/, recursively. (Same as 'src/**')
- src # interchangeable with `src/` or `src/**`
- src/**/*.py # Glob to match all python files recursively under a directory
- src/ # Will match every file under src/, recursively. (Same as 'src/**/*.*)
- src # interchangeable with `src/` or `src/**/*.*`
- src/foo.py # Include a specific file
```
Expand All @@ -50,7 +50,7 @@ targets:
- fingerprints.yaml
source:
depends-on: [dependencies]
include-paths: ['src/**.py']
include-paths: ['src/**/*.py']
```
**All paths will be lexicographically sorted at runtime**, however dependencies
Expand All @@ -60,22 +60,20 @@ are always resolved in the order provided.

### Excluding Files

There may be some paths that you never want to consider. For instance `__pycache__` is
always excluded by default, no matter where it falls.
There may be some paths that you never want to consider.
`.pyc`, `__pycache__` and `.pytest_cache/` are always ignored by default.

You can exclude paths at the base of your yaml:

```yaml
ignore-paths:
- __pycache__ # Never necessary, this path is always ignored
- .secrets # Entire directory will always be ignored wherever it is in the tree
- secret.py # Will be ignored in every directory it exists in.
- '**/ignore-me.py' # Ignore every 'ignore-me.py' in the tree
- 'src/special/ignore-me-also.py' # Ignores this specific file
targets:
foo:
# Will include src/foo/bar, but not src/.secrets/sekret or src/foo/__pycache__/blah
include-paths: ['src']
```


[common-build-scripts]: https://github.com/uwit-iam/common-build-scripts
42 changes: 30 additions & 12 deletions fingerprinter/fingerprinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@

from .models import FingerprintConfig


class Fingerprinter:
def __init__(self, config: FingerprintConfig):
self.config = config
self.path_cache = {}
self.ignored_paths = {'__pycache__'}
self.ignored_paths = {
'**/*.pyc',
'**/__pycache__/**',
'**/.pytest_cache/**'
}
self.ignored_paths.update(self.config.ignore_paths)
self.included_paths = set()

Expand All @@ -25,8 +30,10 @@ def resolve_path(self, path: str) -> List[str]:
if os.path.isfile(path):
self.path_cache[path] = [path]
elif os.path.isdir(path):
path = os.path.join(path, '*')
self.path_cache[path] = sorted(glob.glob(path))
glob_ = os.path.join(path, '**', '*.*')
logging.debug(f"Auto-expanding path {path} to glob: {glob_}")
path = glob_
self.path_cache[path] = sorted(glob.glob(path, recursive=True))
return self.path_cache.get(path, [])

@staticmethod
Expand Down Expand Up @@ -56,18 +63,21 @@ def path_is_ignored(self, filename: str) -> bool:
if filename in self.included_paths:
return False

paths_to_ignore = set()

if filename not in self.ignored_paths:
for p in self.ignored_paths:

if (
# /foo/bar/baz.py will be ignore if 'foo/*' is ignored
('*' in p and filename in glob.glob(p))
('*' in p and filename in glob.glob(p, recursive=True))
# /foo/bar/baz.py will be ignored if 'baz.py' is ignored
or os.path.basename(filename) == p
# /foo/bar/baz.py will be ignored if '/foo/bar' is ignored
or os.path.dirname(filename) == p
):
self.ignored_paths.add(filename)
paths_to_ignore.add(filename)

self.ignored_paths.update(paths_to_ignore)

if filename in self.ignored_paths:
return True
Expand All @@ -77,12 +87,19 @@ def path_is_ignored(self, filename: str) -> bool:

def get_path_fingerprint(self, path: str) -> bytes:
h = hashlib.sha256()
for fn in sorted(self.resolve_path(path)):
if os.path.isdir(fn):
h.update(self.get_path_fingerprint(fn))
elif os.path.isfile(fn):
logging.debug(f"Getting fingerprint for file: {fn}")
h.update(self.get_file_sha256sum(fn))
resolved_paths = sorted(self.resolve_path(path))
if resolved_paths:
for fn in resolved_paths:
if self.path_is_ignored(fn):
logging.debug(f'Ignoring path "{fn}"')
continue
if os.path.isdir(fn):
h.update(self.get_path_fingerprint(fn))
elif os.path.isfile(fn):
logging.debug(f"Getting fingerprint for file: {fn}")
h.update(self.get_file_sha256sum(fn))
else:
logging.warning(f'No files matched path "{path}"')
return h.hexdigest().encode('UTF-8')

def get_fingerprint_bytes(self, target: str) -> bytes:
Expand All @@ -97,6 +114,7 @@ def get_fingerprint(self, target: str) -> str:
h.update(self.get_fingerprint_bytes(dep))

for path in sorted(target.include_paths):
logging.debug(f'Resolving files for path "{path}"')
h.update(self.get_path_fingerprint(path))

return h.hexdigest()
7 changes: 6 additions & 1 deletion fingerprinter/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@


class FingerprintTarget(BaseModel):
class Config:
allow_population_by_field_name = True

depends_on: List[str] = Field(default_factory=lambda: [], alias='depends-on')

# All directory paths are recursive.
Expand All @@ -12,5 +15,7 @@ class FingerprintTarget(BaseModel):


class FingerprintConfig(BaseModel):
ignore_paths: List[str] = Field(default_factory=lambda: ['__pycache__'], alias='ignore-paths')
class Config:
allow_population_by_field_name = True
ignore_paths: List[str] = Field(default_factory=lambda: [], alias='ignore-paths')
targets: Dict[str, FingerprintTarget]
33 changes: 16 additions & 17 deletions fingerprints.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
```yaml
# This example has a source fingerprint that is generated for all python files
# under the src/ directory, but the fingerprint is dependent on the
# dependency locks. This means that even if all python files remain
# untouched, an update to the dependencies will generate a new
# source fingerprint.
# `fingerprints.yaml` is also included here to ensure that changes
# to the actual fingerprint configuration regenerates all fingerprints.
targets:
dependencies:
include-paths:
- poetry.lock
- fingerprints.yaml
source:
depends-on: [dependencies]
include-paths: ['fingerprinter/**.py']
```
# This example has a source fingerprint that is generated for all python files
# under the src/ directory, but the fingerprint is dependent on the
# dependency locks. This means that even if all python files remain
# untouched, an update to the dependencies will generate a new
# source fingerprint.
# `fingerprints.yaml` is also included here to ensure that changes
# to the actual fingerprint configuration regenerates all fingerprints.
targets:
dependencies:
include-paths:
- poetry.lock
- fingerprints.yaml
source:
depends-on: [dependencies]
include-paths:
- fingerprinter
5 changes: 3 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6ee0414

Please sign in to comment.