Skip to content

Commit

Permalink
[EDS-606] [EDS-603] Improve query accuracy and efficiency (#120)
Browse files Browse the repository at this point in the history
* Support toggling experimental features by environment. Defaults to false.

* Add experimental feature to toggle search mode for testing the new search.

* Add an experimental search type to improve query efficiency

- Adds a new experimental search option to allow reviewers to toggle between "classic" and "experimental" search mode. This is not a permanent or customer-facing UX component.

- Implements a new search mode that can be selected; only has an effect when searching by name. This new mode reduces request times by about 60% and provides more accurate results.

* Add an experimental search type to improve query efficiency

- Adds a new experimental search option to allow reviewers to toggle between "classic" and "experimental" search mode. This is not a permanent or customer-facing UX component.

- Implements a new search mode that can be selected; only has an effect when searching by name. This new mode reduces request times by about 60% and provides more accurate results.

* [Bot] Update version to 2.0.12

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Thomas Thorogood and github-actions[bot] authored Oct 6, 2021
1 parent 2ba6a29 commit 3780d16
Show file tree
Hide file tree
Showing 23 changed files with 708 additions and 246 deletions.
3 changes: 2 additions & 1 deletion docker/husky-directory-base.dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
ARG BASE_VERSION=latest
FROM ghcr.io/uwit-iam/uw-saml-poetry:${BASE_VERSION} as poetry-base
RUN apt-get update && apt-get -y install gcc
ARG FINGERPRINT=""
WORKDIR /app
COPY poetry.lock pyproject.toml ./
ENV PATH="$POETRY_HOME/bin:$PATH" \
UW_HUSKY_DIRECTORY_BASE_FINGERPRINT=${FINGERPRINT}
RUN poetry install
RUN poetry install --no-root --no-interaction && apt-get -y remove gcc && apt-get -y autoremove
1 change: 1 addition & 0 deletions husky_directory/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class ApplicationConfig(FlaskConfigurationSettings):
version: Optional[str] = Field(None, env="HUSKY_DIRECTORY_VERSION")
start_time: datetime = Field(datetime.now())
deployment_id: Optional[str] = Field(None, env="DEPLOYMENT_ID")
show_experimental: Optional[bool] = Field(None, env="SHOW_EXPERIMENTAL_FEATURES")

# Aggregated Settings
pws_settings: PWSSettings = PWSSettings()
Expand Down
15 changes: 11 additions & 4 deletions husky_directory/blueprints/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from werkzeug.exceptions import BadRequest, HTTPException
from werkzeug.local import LocalProxy

from husky_directory.app_config import ApplicationConfig
from husky_directory.models.search import (
DirectoryBaseModel,
Person,
Expand All @@ -33,6 +34,7 @@ class Config(DirectoryBaseModel.Config):
error: Optional[ErrorModel]
status_code: int = 200
uwnetid: Optional[str] = None
show_experimental: bool = False


@singleton
Expand All @@ -54,8 +56,10 @@ def __init__(self, logger: Logger, injector: Injector):
)

@staticmethod
def index(session: LocalProxy):
context = RenderingContext(uwnetid=session.get("uwnetid"))
def index(session: LocalProxy, settings: ApplicationConfig):
context = RenderingContext(
uwnetid=session.get("uwnetid"), show_experimental=settings.show_experimental
)
return (
render_template("views/index.html", **context.dict(exclude_none=True)),
200,
Expand Down Expand Up @@ -124,9 +128,12 @@ def search_listing(
service: DirectorySearchService,
logger: Logger,
session: LocalProxy,
settings: ApplicationConfig,
):
context = RenderingContext.construct()
context.uwnetid = session.get("uwnetid")
context = RenderingContext.construct(
uwnetid=session.get("uwnetid"),
show_experimental=settings.show_experimental,
)
try:
form_input = SearchDirectoryFormInput.parse_obj(request.form)
context.request_input = form_input
Expand Down
10 changes: 10 additions & 0 deletions husky_directory/models/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,13 @@ class PopulationType(Enum):
class ResultDetail(Enum):
full = "full"
summary = "summary"


class SearchType(Enum):
# Old-school cool. It's inefficient but
# very explicit.
classic = "classic"

# In active development, constantly striving to
# provide more relevant results, faster!
experimental = "experimental"
175 changes: 166 additions & 9 deletions husky_directory/models/pws.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,31 @@
"""
from __future__ import annotations

import re
from typing import Any, Dict, List, NoReturn, Optional

from inflection import humanize
from pydantic import BaseModel, Extra, Field, validator

from .base import DirectoryBaseModel
from .common import RecordConstraint, UWDepartmentRole
from .enum import AffiliationState
from ..util import camelize

_can_humanize_expr = re.compile("^[a-zA-Z]+$")


def humanize_(val: str) -> str:
"""
Don't use the humanize function for names with punctuation,
as humanize may make them worse. For instance "Anne-marie"
instead of "Anne-Marie".
"""
if re.fullmatch(_can_humanize_expr, val):
return humanize(val)
return val


class PWSBaseModel(BaseModel):
class Config:
Expand Down Expand Up @@ -173,6 +189,18 @@ class NamedIdentity(PWSBaseModel):
preferred_middle_name: Optional[str]
preferred_last_name: Optional[str] = Field(None, alias="PreferredSurname")

# These next fields are calculated on instantiation
# to make it easier to work with these names in
# meaningful ways. They are listed as optional
# because they aren't required to create the object,
# but they will always be populated during creation.
displayed_surname: Optional[str]
displayed_first_name: Optional[str]
displayed_middle_name: Optional[str]
name_tokens: List[str] = []
canonical_tokens: List[str] = []
sort_key: Optional[str]

@validator(
"display_name",
"registered_name",
Expand All @@ -181,19 +209,123 @@ class NamedIdentity(PWSBaseModel):
"preferred_first_name",
"preferred_middle_name",
"preferred_last_name",
always=True,
)
def humanize_name_field(cls, value: Optional[str]) -> Optional[str]:
def sanitize_name_fields(cls, value: Optional[str]) -> Optional[str]:
if value:
return " ".join(humanize(v) for v in value.split())
return " ".join(humanize_(v) for v in value.split())
return value

def get_displayed_surname(self):
display_name_tokens = self.display_name.split()
if self.preferred_last_name and self.preferred_last_name in display_name_tokens:
return self.preferred_last_name
elif self.registered_surname in display_name_tokens:
return self.registered_surname
return self.display_name.split()[-1]
@validator("displayed_surname", always=True)
def populate_displayed_surname(cls, v: Any, values: Dict):
"""
Returns the canonical surname for the identity, if there is one;
otherwise returns the last token in the user's display name.
"""
display_name = values.get("display_name")
preferred_last_name = values.get("preferred_last_name")
registered_surname = values.get("registered_surname")

if preferred_last_name and preferred_last_name in display_name:
return preferred_last_name
elif registered_surname and registered_surname in display_name:
return registered_surname

# This should only happen if we have dirty data.
# If nothing makes sense, we'll just assume the
# default of a one-token surname.
return display_name.split()[-1]

@validator("displayed_first_name", always=True)
def populate_displayed_first_name(cls, v: Any, values: Dict):
"""
Returns the canonical first name for the identity, if there is one;
otherwise returns the first token in the user's display name.
"""
display_name = values.get("display_name")
last_name = values.get("displayed_surname")
last_name_index = display_name.index(last_name)
first_middle = display_name[:last_name_index]
preferred_first_name = values.get("preferred_first_name")
registered_first_middle = values.get("registered_first_middle_name")

if preferred_first_name and preferred_first_name in first_middle:
return preferred_first_name
elif registered_first_middle and registered_first_middle in first_middle:
return first_middle.strip()

# This should only happen if we have dirty data.
# If nothing makes sense, we'll just assume the
# default of a one-token name.
return display_name.split()[0]

@validator("displayed_middle_name", always=True)
def populate_displayed_middle_name(cls, v: Any, values: Dict):
"""
Returns the canonical middle name for the identity, if
they have set a preferred middle name. Otherwise, returns
whatever part of the name is not the first name and is not the last name.
"""
preferred_middle_name = values.get("preferred_middle_name")
registered_first_middle_name = values.get("registered_first_middle_name")
displayed_first_name = values.get("displayed_first_name")
displayed_surname = values.get("displayed_surname")
display_name = values.get("display_name")

if preferred_middle_name and preferred_middle_name in display_name:
return preferred_middle_name

splice_index = len(displayed_first_name) + 1

if (
registered_first_middle_name
and displayed_first_name in registered_first_middle_name
):
middle_name = registered_first_middle_name[splice_index:]
else:
surname_index = display_name.index(displayed_surname)
middle_name = display_name[splice_index:surname_index]

if middle_name and middle_name in display_name:
return middle_name
return None

@validator("name_tokens", always=True)
def populate_name_tokens(cls, v: Any, values: Dict):
"""
Populates a name tokens field to be used for processing,
grouping together tokens that are multi-token name pieces.
For instance "Ana Mari Cauce" will return ["Ana Mari", "Cauce"]
"""
return [
values.get(field)
for field in (
"displayed_first_name",
"displayed_middle_name",
"displayed_surname",
)
if values.get(field)
]

@validator("canonical_tokens", always=True)
def populate_sorted_name_tokens(cls, v: Any, values: Dict):
tokens = values.get("name_tokens")
result = [tokens[0]]
if len(tokens) > 1:
result.insert(0, tokens[-1])
if len(tokens) > 2:
result.extend(tokens[1:-1])
return result

@validator("sort_key", always=True)
def populate_sort_key(cls, v: Any, values: Dict):
"""
Pre-calculates the sort key for the display name,
using the grouped name tokens.
"Ana Mari Cauce" becomes "Cauce Ana Mari"
"""
return " ".join(values.get("canonical_tokens"))


class PersonOutput(NamedIdentity):
Expand Down Expand Up @@ -248,3 +380,28 @@ class ListPersonsOutput(ListResponsesOutputWrapper):
)
previous: Optional[ListPersonsInput] = Field(None, description="See `next`")
request_statistics: Optional[ListPersonsRequestStatistics]


class ResultBucket(DirectoryBaseModel):
description: str
students: List[PersonOutput] = []
employees: List[PersonOutput] = []

# The relevance is an index value to help sort the
# buckets themselves. The lower the value, the closer
# to the beginning of a list of buckets this bucket will be.
relevance: int = 0

def add_person(self, pws_person: PersonOutput) -> NoReturn:
if pws_person.affiliations.employee:
self.employees.append(pws_person)
if pws_person.affiliations.student:
self.students.append(pws_person)

@property
def sorted_students(self) -> List[PersonOutput]:
return sorted(self.students, key=lambda p: p.sort_key)

@property
def sorted_employees(self) -> List[PersonOutput]:
return sorted(self.employees, key=lambda p: p.sort_key)
5 changes: 4 additions & 1 deletion husky_directory/models/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from husky_directory.models.base import DirectoryBaseModel
from husky_directory.models.common import UWDepartmentRole
from husky_directory.models.enum import PopulationType, ResultDetail
from husky_directory.models.enum import PopulationType, ResultDetail, SearchType

# There is no direct dependency on the model here, we only need it for type checking;
# this protects us from accidentally creating a cyclic dependency between the modules.
Expand Down Expand Up @@ -59,6 +59,7 @@ class SearchDirectoryFormInput(DirectoryBaseModel):
render_query: Optional[str]
render_population: Optional[PopulationType]
render_length: Optional[ResultDetail]
search_type: SearchType = SearchType.classic

include_test_identities: bool = False # Not currently supported

Expand Down Expand Up @@ -135,6 +136,7 @@ class SearchDirectoryInput(DirectoryBaseModel):
population: PopulationType = PopulationType.employees
include_test_identities: bool = False
person_href: Optional[str]
search_type: SearchType = SearchType.classic

@classmethod
def search_methods(cls) -> List[str]:
Expand Down Expand Up @@ -210,6 +212,7 @@ class Person(DirectoryBaseModel):
box_number: Optional[str]
departments: List[UWDepartmentRole] = []
sort_key: Optional[str]
populations: List[PopulationType] = []

href: str

Expand Down
Loading

0 comments on commit 3780d16

Please sign in to comment.