Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement EXCLUSIONS config value #120

Merged
merged 10 commits into from
May 7, 2024
20 changes: 20 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Module",
"type": "debugpy",
"request": "launch",
"module": "sweeper",
"args": [
"sweep",
"--workspace",
"C:\\temp\\locators.gdb"
],
"cwd": "${workspaceFolder}"
}
]
}
5 changes: 4 additions & 1 deletion config.sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@
"TO_ADDRESSES": ["[email protected]"],
"CONNECTIONS_FOLDER": "path-to-folder",
"CHANGE_DETECTION_CONNECTION": "something.sde",
"CHANGE_DETECTION_TABLE": "SGID.META.ChangeDetection"
"CHANGE_DETECTION_TABLE": "SGID.META.ChangeDetection",
"EXCLUSIONS": {
"duplicates": []
}
}
4 changes: 4 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ A normalized string representing the entire address that was passed into the con
> - `--change-detect` argument
> - using user-specific connection files via the `CONNECTIONS_FOLDER` config value

## Exclusions

Tables can be skipped by adding values to the `EXCLUSIONS.<sweeper_key>` config array. These values are matched against table names using [fnmatch](https://docs.python.org/3/library/fnmatch.html#fnmatch.fnmatch). Note that these do not apply when using the `--table-name` argument.

## Development

1. clone arcgis conda environment
Expand Down
13 changes: 12 additions & 1 deletion src/sweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from supervisor.message_handlers import SendGridHandler
from supervisor.models import MessageDetails, Supervisor

from . import backup, config, report, workspace_info
from . import backup, config, report, utilities, workspace_info

Check warning on line 37 in src/sweeper/__main__.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/__main__.py#L37

Added line #L37 was not covered by tests
from .sweepers.addresses import AddressTest
from .sweepers.duplicates import DuplicateTest
from .sweepers.empties import EmptyTest
Expand Down Expand Up @@ -129,6 +129,7 @@

log.info(f"running {len(closet)} sweepers. try fix: {try_fix}")
for tool in closet:
log.info(f"running sweeper: {tool.key}")

Check warning on line 132 in src/sweeper/__main__.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/__main__.py#L132

Added line #L132 was not covered by tests
if tool.table_name:
run_tool(tool)

Expand All @@ -145,6 +146,16 @@
if any("SGID." in fc for fc in feature_class_names):
feature_class_names = [fc.split("SGID.", 1)[1] for fc in feature_class_names if "SGID." in fc]

#: apply exclusions
if config.has_config():
try:
exclusions_config = config.get_config("EXCLUSIONS")
except KeyError:
exclusions_config = {}

Check warning on line 154 in src/sweeper/__main__.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/__main__.py#L151-L154

Added lines #L151 - L154 were not covered by tests

exclusions = exclusions_config.get(tool.key, [])
feature_class_names = utilities.apply_exclusions(feature_class_names, exclusions)

Check warning on line 157 in src/sweeper/__main__.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/__main__.py#L156-L157

Added lines #L156 - L157 were not covered by tests

log.info(f"feature_class_names is: {feature_class_names}")

if using_change_detection and feature_class_names is None:
Expand Down
12 changes: 12 additions & 0 deletions src/sweeper/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
try:
with open("config.json") as f:
config = json.load(f)
log.info("config.json loaded successfully")
except FileNotFoundError:
log.debug("A config.json not found in current working directory. This will prevent some features from working.")

Expand All @@ -20,5 +21,16 @@
)


def has_config():
"""
Check if a config.json file has been loaded into the config module.

Returns:
bool: True if a config.json file has been loaded, False otherwise.
"""

return config is not None

Check warning on line 32 in src/sweeper/config.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/config.py#L32

Added line #L32 was not covered by tests


# set this to the current working directory
LOG_FILE_PATH = Path(Path.cwd(), "sweeper.log")
4 changes: 4 additions & 0 deletions src/sweeper/sweepers/addresses.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@
"""

import arcpy


from ..address_parser import Address


class AddressTest:
"""A class that validates address data"""

key = "addresses"

def __init__(self, workspace, table_name, field_name):
self.workspace = workspace
self.table_name = table_name
Expand Down
2 changes: 2 additions & 0 deletions src/sweeper/sweepers/duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
class DuplicateTest(SweeperBase):
"""A class that finds and removes duplicate geometries or attributes or both"""

key = "duplicates"

Check warning on line 17 in src/sweeper/sweepers/duplicates.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/sweepers/duplicates.py#L17

Added line #L17 was not covered by tests

def __init__(self, workspace, table_name):
self.workspace = workspace
self.table_name = table_name
Expand Down
2 changes: 2 additions & 0 deletions src/sweeper/sweepers/empties.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
class EmptyTest(SweeperBase):
"""A class to find empty geometries"""

key = "empties"

Check warning on line 17 in src/sweeper/sweepers/empties.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/sweepers/empties.py#L17

Added line #L17 was not covered by tests

def __init__(self, workspace, table_name):
self.workspace = workspace
self.table_name = table_name
Expand Down
2 changes: 2 additions & 0 deletions src/sweeper/sweepers/invalids.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
class InvalidSweeper:
"""A class that identifies invalid geometry objects and returns a report dictionary"""

key = "invalids"

Check warning on line 8 in src/sweeper/sweepers/invalids.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/sweepers/invalids.py#L8

Added line #L8 was not covered by tests

def __init__(self, workspace, table_name):
self.report = {}
self.workspace = workspace
Expand Down
2 changes: 2 additions & 0 deletions src/sweeper/sweepers/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ def get_description_text_only(html):
class MetadataTest(SweeperBase):
"""A class that validates geodatabase metadata"""

key = "metadata"

def __init__(self, workspace, table_name):
self.workspace = workspace
self.table_name = table_name
Expand Down
27 changes: 27 additions & 0 deletions src/sweeper/utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging
from fnmatch import fnmatchcase

log = logging.getLogger("sweeper")


def apply_exclusions(list, exclusions):
"""
Apply exclusions to a list of items. Matches are checked using the fnmatch module.

Args:
list (list): The list of items to apply exclusions to.
exclusions (list): The list of exclusion fnmatch-compatible patterns.

Returns:
list: The updated list after applying exclusions.
"""
for exclusion in exclusions:
new_list = []
for item in list:
if fnmatchcase(item.casefold(), exclusion.casefold()):
log.info(f"Excluding {item} based on exclusion {exclusion}")
else:
new_list.append(item)
list = new_list

return list
47 changes: 47 additions & 0 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python
# * coding: utf8 *
"""
test_utilities.py
tests for the utilities module
"""

from sweeper import utilities


class TestApplyExclusions:
def test_simple_exclusions(self):
exclusions = [
"test1",
"test2",
"test3",
]
test_list = [
"test1",
"test2",
"test4",
"test5",
]
result = utilities.apply_exclusions(test_list, exclusions)
assert result == ["test4", "test5"]

def test_glob_exclusions(self):
exclusions = [
"test*",
]
test_list = ["test1", "test2", "test4", "test5", "hello"]
result = utilities.apply_exclusions(test_list, exclusions)
assert result == ["hello"]

def test_no_exclusions(self):
exclusions = []
test_list = ["test1", "test2", "test4", "test5", "hello"]
result = utilities.apply_exclusions(test_list, exclusions)
assert result == test_list

def test_case_insensitive(self):
exclusions = [
"TEST*",
]
test_list = ["test1", "test2", "test4", "test5", "hello"]
result = utilities.apply_exclusions(test_list, exclusions)
assert result == ["hello"]