Skip to content

Commit

Permalink
chore: remove black in favor of ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
stdavis committed May 2, 2024
1 parent 37bcedf commit 1321667
Show file tree
Hide file tree
Showing 16 changed files with 156 additions and 140 deletions.
1 change: 0 additions & 1 deletion .vscode/extensions.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"recommendations": [
"editorconfig.editorconfig",
"njpwerner.autodocstring",
"ms-python.black-formatter",
"ms-python.vscode-pylance",
"ms-python.python",
"donjayamanne.python-environment-manager",
Expand Down
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true
},
"cSpell.words": [
Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
line-length = 120
[tool.ruff.lint]
ignore = ["E501"]
[tool.black]
line-length = 120
[tool.pytest.ini_options]
minversion = "6.0"
testpaths = ["tests", "src"]
Expand Down
5 changes: 3 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ A normalized string representing the entire address that was passed into the con
1. install required dependencies to work on sweeper
- `pip install -e ".[tests]"`
1. `test_metadata.py` uses a SQL database that needs to be restored via `src/sweeper/tests/data/Sweeper.bak` to your local SQL Server.
1. run tests: `pytest`
1. run linter: `ruff check .`
1. run sweeper: `python -m sweeper`
1. test: `pytest`
1. lint: `ruff check .`
1. format: `ruff format .`
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
setup.py
A module that installs sweeper as a module
"""

import glob
from os.path import basename, splitext
from pathlib import Path
Expand Down Expand Up @@ -59,7 +60,6 @@
"pytest-mock==3.*",
"pytest-watch==4.*",
"pytest==8.*",
"black==24.*",
"ruff==0.*",
],
},
Expand Down
1 change: 1 addition & 0 deletions src/sweeper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
sweeper sweep --workspace=c:\\data\\thing --try-fix --save-report=c:\\temp --backup-to=c:\\temp\\backup.gdb
sweeper sweep addresses --workspace=c:\\data\\thing --try-fix --save-report=c:\\temp --backup-to=c:\\temp\\backup.gdb --field-name=ADDRESS
"""

import datetime
import logging
import logging.handlers
Expand Down
130 changes: 71 additions & 59 deletions src/sweeper/address_parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/usr/bin/env python
# * coding: utf8 *
'''
"""
address_parser.py
A module that parses street addresses into their various parts.
'''
"""

import json
import pprint
import re
Expand All @@ -12,44 +13,45 @@
import usaddress

TAG_MAPPING = {
'AddressNumber': 'address_number',
'AddressNumberPrefix': 'address_number',
'AddressNumberSuffix': 'address_number_suffix',
'StreetNamePreDirectional': 'prefix_direction',
'StreetName': 'street_name',
"AddressNumber": "address_number",
"AddressNumberPrefix": "address_number",
"AddressNumberSuffix": "address_number_suffix",
"StreetNamePreDirectional": "prefix_direction",
"StreetName": "street_name",
# 'StreetNamePreModifier': 'street_name', #: handled in class below
# 'StreetNamePreType': 'street_name', #: handled in class below
'StreetNamePostDirectional': 'street_direction',
'StreetNamePostModifier': 'street_type',
'StreetNamePostType': 'street_type',
"StreetNamePostDirectional": "street_direction",
"StreetNamePostModifier": "street_type",
"StreetNamePostType": "street_type",
# 'CornerOf': 'address1',
# 'IntersectionSeparator': 'address1',
# 'LandmarkName': 'address1',
# 'USPSBoxGroupID': 'address1',
# 'USPSBoxGroupType': 'address1',
# 'USPSBoxID': 'address1',
# 'USPSBoxType': 'address1',
'BuildingName': 'unit_id',
'OccupancyType': 'unit_type',
'OccupancyIdentifier': 'unit_id',
'SubaddressIdentifier': 'unit_id',
'SubaddressType': 'unit_type',
'PlaceName': 'city',
'StateName': 'state',
'ZipCode': 'zip_code',
'USPSBoxID': 'po_box'
"BuildingName": "unit_id",
"OccupancyType": "unit_type",
"OccupancyIdentifier": "unit_id",
"SubaddressIdentifier": "unit_id",
"SubaddressType": "unit_type",
"PlaceName": "city",
"StateName": "state",
"ZipCode": "zip_code",
"USPSBoxID": "po_box",
}
TWO_CHAR_DIRECTIONS = ['NO', 'SO', 'EA', 'WE']
with open(join(dirname(realpath(__file__)), 'street_types.json'), 'r') as file:
TWO_CHAR_DIRECTIONS = ["NO", "SO", "EA", "WE"]
with open(join(dirname(realpath(__file__)), "street_types.json"), "r") as file:
STREET_TYPES = json.loads(file.read())
HWY_REGEX = re.compile('(SR|STATE ROUTE|HIGHWAY)')
UNIT_VALUES_NOT_APPROPRIATE_FOR_HASH_SIGN = ['rear']
HWY_REGEX = re.compile("(SR|STATE ROUTE|HIGHWAY)")
UNIT_VALUES_NOT_APPROPRIATE_FOR_HASH_SIGN = ["rear"]


class Address():
'''
class Address:
"""
Class for parsing address strings
'''
"""

address_number = None
address_number_suffix = None
prefix_direction = None
Expand All @@ -64,14 +66,14 @@ class Address():
state = None

def __init__(self, address_text):
parts, parsed_as = usaddress.tag(address_text.replace('.', ''), TAG_MAPPING)
if parsed_as not in ['Street Address', 'PO Box']:
parts, parsed_as = usaddress.tag(address_text.replace(".", ""), TAG_MAPPING)
if parsed_as not in ["Street Address", "PO Box"]:
raise Exception(f'"{address_text}" is not recognized as a valid street address, or P.O. Box')

for part in parts:
try:
value = parts[part].upper()
if part.endswith('direction'):
if part.endswith("direction"):
value = normalize_direction(value)

setattr(self, part, value)
Expand All @@ -83,85 +85,95 @@ def __init__(self, address_text):

try:
#: e.g. US HWY
self.street_name = f'{normalize_street_name_pre_type(self.StreetNamePreType)} {self.street_name}'
self.street_name = f"{normalize_street_name_pre_type(self.StreetNamePreType)} {self.street_name}"
del self.StreetNamePreType
except AttributeError:
pass

try:
self.street_name = f'{self.StreetNamePreModifier} {self.street_name}'
self.street_name = f"{self.StreetNamePreModifier} {self.street_name}"
del self.StreetNamePreModifier
except AttributeError:
pass

#: look for two-character prefix directions which usaddress does not handle
if self.street_name:
street_name_parts = self.street_name.split(' ')
street_name_parts = self.street_name.split(" ")
if len(street_name_parts) > 1:
if street_name_parts[0].upper() in TWO_CHAR_DIRECTIONS and self.prefix_direction is None:
self.prefix_direction = normalize_direction(street_name_parts[0])
self.street_name = ' '.join(street_name_parts[1:])
self.street_name = " ".join(street_name_parts[1:])
elif street_name_parts[-1].upper() in TWO_CHAR_DIRECTIONS and self.street_direction is None:
self.street_direction = normalize_direction(street_name_parts[-1])
self.street_name = ' '.join(street_name_parts[:-1])
self.street_name = " ".join(street_name_parts[:-1])

Check warning on line 108 in src/sweeper/address_parser.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/address_parser.py#L108

Added line #L108 was not covered by tests

if self.street_type is not None:
#: handle multiple street_types (assume only the last one is valid and move all others to the street name)
if len(self.street_type.split(' ')) > 1:
parsed_street_types = self.street_type.split(' ')
self.street_name += ' ' + ' '.join(parsed_street_types[:-1])
if len(self.street_type.split(" ")) > 1:
parsed_street_types = self.street_type.split(" ")
self.street_name += " " + " ".join(parsed_street_types[:-1])
self.street_type = parsed_street_types[-1]

try:
self.street_type = normalize_street_type(self.street_type)
except InvalidStreetTypeError:
#: must be part of the street name
self.street_name += f' {self.street_type}'
self.street_name += f" {self.street_type}"
self.street_type = None

if self.unit_id is not None:
#: add `#` if there is not unit type and the unit is numeric
if not self.unit_id.startswith('#') and self.unit_type is None and self.unit_id.lower() not in UNIT_VALUES_NOT_APPROPRIATE_FOR_HASH_SIGN:
self.unit_id = f'# {self.unit_id}'
if (
not self.unit_id.startswith("#")
and self.unit_type is None
and self.unit_id.lower() not in UNIT_VALUES_NOT_APPROPRIATE_FOR_HASH_SIGN
):
self.unit_id = f"# {self.unit_id}"

#: strip `#` if there is a unit type
elif self.unit_id.startswith('#') and self.unit_type is not None:
elif self.unit_id.startswith("#") and self.unit_type is not None:
self.unit_id = self.unit_id[1:].strip()

def __repr__(self):
properties = vars(self)
properties.update({'normalized': self.normalized})
properties.update({"normalized": self.normalized})

Check warning on line 139 in src/sweeper/address_parser.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/address_parser.py#L139

Added line #L139 was not covered by tests

return f'Parsed Address:\n{pprint.pformat(properties)}'
return f"Parsed Address:\n{pprint.pformat(properties)}"

Check warning on line 141 in src/sweeper/address_parser.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/address_parser.py#L141

Added line #L141 was not covered by tests

@property
def normalized(self):
'''
"""
getter for normalized address string
'''
"""
if self.po_box is not None:
return f'PO BOX {self.po_box}'
return f"PO BOX {self.po_box}"

parts = [
self.address_number, self.address_number_suffix, self.prefix_direction, self.street_name, self.street_type, self.street_direction, self.unit_type,
self.unit_id
self.address_number,
self.address_number_suffix,
self.prefix_direction,
self.street_name,
self.street_type,
self.street_direction,
self.unit_type,
self.unit_id,
]

return ' '.join([part for part in parts if part is not None])
return " ".join([part for part in parts if part is not None])


def normalize_direction(direction_text):
'''
"""
returns the single letter corresponding to the input direction
'''
"""

return direction_text[0].upper()


def normalize_street_type(type_text):
'''
"""
returns the standard abbreviation for the input street type
'''
"""

type_text = type_text.upper()
for abbreviation, values in STREET_TYPES.items():
Expand All @@ -172,18 +184,18 @@ def normalize_street_type(type_text):


def normalize_street_name_pre_type(text):
'''normalizes highways by doing things like replaces SR with HWY and removes US
"""normalizes highways by doing things like replaces SR with HWY and removes US
No need to worried about casing or "."s because usaddress has already taken care of them by this point.
'''
return HWY_REGEX.sub('HWY', text).replace('US ', '')
"""
return HWY_REGEX.sub("HWY", text).replace("US ", "")


class InvalidStreetTypeError(Exception):
'''
"""
exception for when the street type does not have a corresponding value in street_types.json
'''
"""

def __init__(self, type_text):
super().__init__()
self.message = f'No matching abbreviation found for {type_text}'
self.message = f"No matching abbreviation found for {type_text}"
1 change: 1 addition & 0 deletions src/sweeper/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
backup.py
A module that creates a gdb if it doesn't exist and inserts a feature class
"""

import logging

Check warning on line 8 in src/sweeper/backup.py

View check run for this annotation

Codecov / codecov/patch

src/sweeper/backup.py#L8

Added line #L8 was not covered by tests
import os
from datetime import datetime
Expand Down
Loading

0 comments on commit 1321667

Please sign in to comment.