Skip to content

Commit

Permalink
Optimization and fine-tuning (#101)
Browse files Browse the repository at this point in the history
* Move all gunicorn config into gunicorn.conf.py and update configuration to, like, work.

* Remove unused, empty directory.

* Blacken gunicorn.conf.py

* Optimize search output import and translation

- Make `_GLOBAL_CONSTRAINTS` a constant instead of generating on each request
- Don't import the raw data to the `ListPersonsOutput` model until AFTER it has been filtered
- Simplify RecordConstraint logic by only operating on Dicts, instead of Generic Types (which also speeds things up quite a lot)
- Stop duplicate filtering in translator.py that is already handled by PWS.
- Update tests to work with new logic.

* Fix minor typo in pws model documentation

* [Bot] Update version to 2.0.2

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Thomas Thorogood and github-actions[bot] authored Sep 9, 2021
1 parent b9c212c commit d8129bf
Show file tree
Hide file tree
Showing 16 changed files with 182 additions and 164 deletions.
3 changes: 0 additions & 3 deletions docker/development-server.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,5 @@ ENV FLASK_PORT=8000 \

# 0.0.0.0 binding is necessary for the EXPOSE above to have any effect.
CMD poetry run gunicorn -b 0.0.0.0:${FLASK_PORT} \
--log-level ${GUNICORN_LOG_LEVEL} \
-c "/app/husky_directory/gunicorn.conf.py" \
--reload \
--capture-output \
"husky_directory.app:create_app()"
27 changes: 22 additions & 5 deletions husky_directory/gunicorn.conf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
import gevent.monkey

gevent.monkey.patch_all()

worker_class = "gevent"
import os
from multiprocessing import cpu_count


def worker_exit(worker, server):
worker.log.info(f"Server {server} shutting down . . .")


def max_workers():
default_max = 2 * cpu_count() + 1
return os.environ.get("GUNICORN_MAX_WORKERS", default_max)


def max_threads():
default_max = 4
return os.environ.get("GUNICORN_MAX_THREADS", default_max)


max_requests = 1000
bind = "0.0.0.0:8000"
worker_class = "gthread"
workers = max_workers()
threads = max_threads()
loglevel = os.environ.get("GUNICORN_LOG_LEVEL", "DEBUG")
reload = os.environ.get("FLASK_ENV") == "development"
preload_app = os.environ.get("FLASK_ENV") != "development"
33 changes: 11 additions & 22 deletions husky_directory/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@
"""
from __future__ import annotations

import json
import re
from types import SimpleNamespace
from typing import Any, Callable, Generic, Optional, TypeVar, Union
from typing import Any, Callable, Dict, Optional, Union

from pydantic import BaseModel, validator
from pydantic.generics import GenericModel


class UWDepartmentRole(BaseModel):
Expand Down Expand Up @@ -43,10 +40,7 @@ def validate(cls, v) -> RecordNamespace:
return v


T = TypeVar("T")


class RecordConstraint(GenericModel, Generic[T]):
class RecordConstraint(BaseModel):
"""
This model helps us apply additional client-side filters to
result records. A record can be an object or a dict.
Expand Down Expand Up @@ -92,13 +86,11 @@ def callback(record: PersonOutput):
assert data_2['foo']['bar'] == 'temp'
"""

namespace: Union[
RecordNamespace, str
] # Will always be converted to a RecordNamespace
predicate: Callable[[T], bool] = lambda _: False
failure_callback: Callable[[T], bool] = lambda _: False
namespace: Optional[Any]
predicate: Callable[[Any], bool] = lambda _: False
failure_callback: Callable[[Dict], bool] = lambda _: False

def resolve_namespace(self, record: Any) -> Optional[Any]:
def resolve_namespace(self, record: Dict) -> Optional[Any]:
"""
Given a record, resolves the value of the namespace. The record can be anything.
If it is a dict, the dict will be evaluated as namespace for the scope of this method.
Expand All @@ -109,21 +101,18 @@ def resolve_namespace(self, record: Any) -> Optional[Any]:
:param record: The thing you want to test; it can be anything!
:return: The value of the resolved namespace, or None.
"""
if isinstance(record, dict):
# Converts the dict into an object.
record = json.loads(
json.dumps(record), object_hook=lambda item: SimpleNamespace(**item)
)

resolved = record

for token in self.namespace.split("."):
resolved = getattr(resolved, token, None)
try:
resolved = resolved.get(token)
except AttributeError:
resolved = None
if not resolved:
break
return resolved

def matches(self, record: T) -> bool:
def matches(self, record: Dict) -> bool:
"""
Given a thing, returns whether the thing matches the predicate
or the failure callback.
Expand Down
4 changes: 2 additions & 2 deletions husky_directory/models/pws.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class PersonAffiliations(PWSBaseModel):


class NamedIdentity(PWSBaseModel):
display_name: Optional[str] = Field(..., alias="DisplayName")
display_name: Optional[str] = Field(...)
registered_name: Optional[str]
registered_surname: Optional[str]
registered_first_middle_name: Optional[str]
Expand Down Expand Up @@ -201,7 +201,7 @@ class PersonOutput(NamedIdentity):
@validator("href", always=True)
def populate_href(cls, href: Optional[str], values: Dict):
"""
While abbreviated surch results include the href, the full
While abbreviated search results include the href, the full
results do not, so it has to be generated.
"""
if not href:
Expand Down
174 changes: 97 additions & 77 deletions husky_directory/services/pws.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import namedtuple
from functools import partial
from logging import Logger
from typing import Any, Dict, List, Optional, Type, TypeVar, Union, cast
from typing import Any, Dict, List, Optional, Type, TypeVar, Union

import requests
from flask_injector import request
Expand All @@ -27,8 +27,76 @@
)


def clear_namespace(namespace, record: Any) -> bool:
"""
This callback allows attributes to be cleared if they
do not match the criteria, instead of throwing away the entire record.
Practically: if the user has opted out of employee or student publication,
then we null the respective attributes.
"""
ns = namespace.split(".")
attr = ns.pop(
-1
) # Allows us to resolve to the level _above_ the field being cleared
constraint = RecordConstraint.construct(namespace=".".join(ns))
field = constraint.resolve_namespace(record)
if field and attr in field:
del field[attr]
return True


@request
class PersonWebServiceClient:
_GLOBAL_CONSTRAINTS = [
RecordConstraint(
# If the identity has no netid, we are not interested
# in the record.
namespace="UWNetID",
predicate=bool,
),
RecordConstraint(
# If the identity is not eligible for publication,
# we are not interested in the record.
namespace="WhitepagesPublish",
predicate=bool,
),
RecordConstraint(
# TODO: Support including test entities here,
# which will need a minor refactor of how that param
# is passed around. For now, we simply invert
# the bool representation: if we have a test entity,
# then we return False (and exclude the record).
namespace="IsTestEntity",
predicate=lambda v: not bool(v),
),
RecordConstraint(
# If there is a student affiliation and the affiliation's
# publication flag is true, then accept it;
# otherwise clear the affiliation from the record and
# continue processing.
namespace=(
"PersonAffiliations.StudentPersonAffiliation.StudentWhitePages.PublishInDirectory"
),
predicate=bool,
failure_callback=partial(
clear_namespace, "PersonAffiliations.StudentPersonAffiliation"
),
),
RecordConstraint(
# If there is an employee affiliation and the affiliation's
# publication flag is true, then accept it;
# otherwise clear the affiliation from the record and
# continue processing.
namespace=(
"PersonAffiliations.EmployeePersonAffiliation.EmployeeWhitePages.PublishInDirectory"
),
predicate=bool,
failure_callback=partial(
clear_namespace, "PersonAffiliations.EmployeePersonAffiliation"
),
),
]

@inject
def __init__(
self,
Expand Down Expand Up @@ -75,46 +143,56 @@ def _get_sanitized_request_output(
:return: The output; always returns a value, unless PWS raises an error.
"""
constraints = constraints or self.global_constraints
output = self._get_search_request_output(url, params, output_type)
output = self._get_search_request_output(url, params)
if output_type is ListPersonsOutput:
output = cast(ListPersonsOutput, output)
# Post-process the output to remove items that do not meet
# Pre-process the output to remove items that do not meet
# the provided client-side constraints.
output.persons = list(
filter(partial(self._filter_output_item, constraints), output.persons)
output["Persons"] = list(
filter(
partial(self._filter_output_item, constraints), output["Persons"]
)
)
elif output_type is PersonOutput:
output = cast(PersonOutput, output)
if not self._filter_output_item(constraints, output):
raise NotFound
output = output_type.parse_obj(output)
return output

def _get_search_request_output(
self,
url: str,
params: Optional[Dict] = None,
output_type: Type[OutputReturnType] = ListPersonsOutput,
) -> OutputReturnType:
) -> Dict:
response = requests.get(
url, cert=self.cert, params=params, headers={"Accept": "application/json"}
)
self.logger.info(f"[GET] {response.url} : {response.status_code}")
response.raise_for_status()
data = response.json()
output = output_type.parse_obj(data)
return output
return data

def _filter_output_item(
self, constraints: List[RecordConstraint], target: PersonOutput
self,
constraints: List[RecordConstraint],
target: Dict,
) -> bool:
if target.affiliations.student and not self.auth.request_is_authenticated:
target.affiliations.student = None
affiliations: Dict = target.get("PersonAffiliations")

if affiliations and not self.auth.request_is_authenticated:
# If the request is not authenticated, trash student
# data here and now, before it gets exported into
# a model.
affiliations.pop("StudentPersonAffiliation", None)
if not affiliations.get("EmployeePersonAffiliation"):
affiliations.pop("EmployeePersonAffiliation", None)

for constraint in constraints:
if not constraint.matches(target):
return False
# Don't use elif here, because the first
# predicate (above) may modify the dictionary, so we
# must re-check that the dict is populated.
if not affiliations:
return False

return bool(target.affiliations.student) or bool(target.affiliations.employee)
return all(constraint.matches(target) for constraint in constraints)

def get_explicit_href(
self, page_url: str, output_type: Type[OutputReturnType] = ListPersonsOutput
Expand All @@ -138,65 +216,7 @@ def global_constraints(self) -> List[RecordConstraint]:
"""
These constraints are applied to every public request method in this application.
"""

def clear_namespace(namespace, record: Any) -> bool:
"""
This callback allows attributes to be cleared if they
do not match the criteria, instead of throwing away the entire record.
Practically: if the user has opted out of employee or student publication,
then we null the respective attributes.
"""
ns = namespace.split(".")
attr = ns.pop(
-1
) # Allows us to resolve to the level _above_ the field being cleared
constraint = RecordConstraint.construct(namespace=".".join(ns))
field = constraint.resolve_namespace(record)
if field:
setattr(field, attr, None)
return True

return [
RecordConstraint(
# If the identity has no netid, we are not interested
# in the record.
namespace="netid",
predicate=bool,
),
RecordConstraint(
# If the identity is not eligible for publication,
# we are not interested in the record.
namespace="whitepages_publish",
predicate=bool,
),
RecordConstraint(
# TODO: Support including test entities here,
# which will need a minor refactor of how that param
# is passed around. For now, we simply invert
# the bool representation: if we have a test entity,
# then we return False (and exclude the record).
namespace="is_test_entity",
predicate=lambda v: not bool(v),
),
RecordConstraint(
# If there is a student affiliation and the affiliation's
# publication flag is true, then accept it;
# otherwise clear the affiliation from the record and
# continue processing.
namespace="affiliations.student.directory_listing.publish_in_directory",
predicate=bool,
failure_callback=partial(clear_namespace, "affiliations.student"),
),
RecordConstraint(
# If there is an employee affiliation and the affiliation's
# publication flag is true, then accept it;
# otherwise clear the affiliation from the record and
# continue processing.
namespace="affiliations.employee.directory_listing.publish_in_directory",
predicate=bool,
failure_callback=partial(clear_namespace, "affiliations.employee"),
),
]
return self._GLOBAL_CONSTRAINTS

def list_persons(self, request_input: ListPersonsInput) -> ListPersonsOutput:
"""
Expand Down
Loading

0 comments on commit d8129bf

Please sign in to comment.