Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #3685] save search get opportunity id #3704

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion api/src/adapters/search/opensearch_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,18 @@ def search(
search_query: dict,
include_scores: bool = True,
params: dict | None = None,
includes: list[str] | None = None,
excludes: list[str] | None = None,
) -> SearchResponse:
if params is None:
params = {}

response = self._client.search(
index=index_name, body=search_query, params=params, _source_excludes=excludes
index=index_name,
body=search_query,
params=params,
_source_includes=includes,
_source_excludes=excludes,
)
return SearchResponse.from_opensearch_response(response, include_scores)

Expand Down
12 changes: 9 additions & 3 deletions api/src/api/users/user_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

import flask

from src.adapters import db
import src.adapters.search.flask_opensearch as flask_opensearch
from src.adapters import db, search
from src.adapters.db import flask_db
from src.api import response
from src.api.route_utils import raise_flask_error
Expand All @@ -26,6 +27,7 @@
from src.auth.auth_utils import with_login_redirect_error_handler
from src.auth.login_gov_jwt_auth import get_final_redirect_uri, get_login_gov_redirect_uri
from src.db.models.user_models import UserSavedOpportunity, UserTokenSession
from src.services.opportunities_v1.search_opportunities import search_opportunities_id
from src.services.users.create_saved_search import create_saved_search
from src.services.users.delete_saved_opportunity import delete_saved_opportunity
from src.services.users.delete_saved_search import delete_saved_search
Expand Down Expand Up @@ -246,8 +248,9 @@ def user_get_saved_opportunities(db_session: db.Session, user_id: UUID) -> respo
@user_blueprint.doc(responses=[200, 401])
@user_blueprint.auth_required(api_jwt_auth)
@flask_db.with_db_session()
@flask_opensearch.with_search_client()
def user_save_search(
db_session: db.Session, user_id: UUID, json_data: dict
search_client: search.SearchClient, db_session: db.Session, user_id: UUID, json_data: dict
) -> response.ApiResponse:
logger.info("POST /v1/users/:user_id/saved-searches")

Expand All @@ -257,8 +260,11 @@ def user_save_search(
if user_token_session.user_id != user_id:
raise_flask_error(401, "Unauthorized user")

# Retrieve opportunity IDs
opportunity_ids = search_opportunities_id(search_client, json_data["search_query"])

with db_session.begin():
saved_search = create_saved_search(db_session, user_id, json_data)
saved_search = create_saved_search(db_session, user_id, json_data, opportunity_ids)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, we'd have the following in this file:

with db_session.begin():
     saved_search = some_function_call(...)

I want to aim for as little logic as possible outside the service files - that makes the code more reusable (like how the search logic can be reused).

I'd just make create_saved_search have the first thing it does is fetch the opportunity IDs.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


logger.info(
"Saved search for user",
Expand Down
36 changes: 32 additions & 4 deletions api/src/services/opportunities_v1/search_opportunities.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _add_aggregations(builder: search.SearchQueryBuilder) -> None:
builder.aggregation_terms("agency", _adjust_field_name("agency_code"), size=1000)


def _get_search_request(params: SearchOpportunityParams) -> dict:
def _get_search_request(params: SearchOpportunityParams, aggregation: bool = True) -> dict:
builder = search.SearchQueryBuilder()

# Make sure total hit count gets counted for more than 10k records
Expand All @@ -176,8 +176,9 @@ def _get_search_request(params: SearchOpportunityParams) -> dict:
# Filters
_add_search_filters(builder, params.filters)

# Aggregations / Facet / Filter Counts
_add_aggregations(builder)
if aggregation:
# Aggregations / Facet / Filter Counts
_add_aggregations(builder)

return builder.build()

Expand All @@ -186,7 +187,6 @@ def search_opportunities(
search_client: search.SearchClient, raw_search_params: dict
) -> Tuple[Sequence[dict], dict, PaginationInfo]:
search_params = SearchOpportunityParams.model_validate(raw_search_params)

search_request = _get_search_request(search_params)

index_alias = get_search_config().opportunity_search_index_alias
Expand All @@ -213,3 +213,31 @@ def search_opportunities(
records = SCHEMA.load(response.records, many=True)

return records, response.aggregations, pagination_info


def search_opportunities_id(search_client: search.SearchClient, search_query: dict) -> list:
# Override pagination when calling opensearch
updated_search_query = {
**search_query,
"pagination": {
"order_by": "post_date",
"page_offset": 1,
"page_size": 1000,
"sort_direction": "descending",
},
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you move the definition of the static pagination values to a constant at the top of the file?

A very slightly briefer syntax you can do that avoids needing ** unpacking:

Suggested change
updated_search_query = {
**search_query,
"pagination": {
"order_by": "post_date",
"page_offset": 1,
"page_size": 1000,
"sort_direction": "descending",
},
}
updated_search_query = search_query | STATIC_PAGINATION

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

search_params = SearchOpportunityParams.model_validate(updated_search_query)

search_request = _get_search_request(search_params, False)

index_alias = get_search_config().opportunity_search_index_alias
logger.info(
"Querying search index alias %s", index_alias, extra={"search_index_alias": index_alias}
)

response = search_client.search(
index_alias, search_request, includes=["opportunity_id"], excludes=["attachments"]
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entire chunk is duplicated across both this new implementation and the existing one (with just the aggregation flag different), could you move that into some shared bit of code they both use? That way if we need to make changes, it can affect both without missing it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GP. Created a helper function, done.


return [opp["opportunity_id"] for opp in response.records]
6 changes: 4 additions & 2 deletions api/src/services/users/create_saved_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
logger = logging.getLogger(__name__)


def create_saved_search(db_session: db.Session, user_id: UUID, json_data: dict) -> UserSavedSearch:
def create_saved_search(
db_session: db.Session, user_id: UUID, json_data: dict, opportunity_ids: list
) -> UserSavedSearch:
saved_search = UserSavedSearch(
user_id=user_id,
name=json_data["name"],
search_query=json_data["search_query"],
searched_opportunity_ids=[],
searched_opportunity_ids=opportunity_ids,
)

db_session.add(saved_search)
Expand Down
41 changes: 37 additions & 4 deletions api/tests/src/api/users/test_user_save_search_post.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
import uuid

import pytest

from src.api.opportunities_v1.opportunity_schemas import OpportunityV1Schema
from src.constants.lookup_constants import FundingInstrument
from src.db.models.user_models import UserSavedSearch
from tests.src.api.opportunities_v1.conftest import get_search_request
from tests.src.api.opportunities_v1.test_opportunity_route_search import (
NASA_INNOVATIONS,
NASA_SUPERSONIC,
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't import across unit tests like this - if the setup data has any shared use, move it to the conftest files that are already shared between the unit tests.

Effectively when we're talking test setup, it should only go in one of three places:

  • Top of the same test file - for when test setup only pertains to whatever we're testing in that file
  • conftest.py in the same directory - when multiple test files in the same directory want to reuse it
  • conftest.py in the root tests folder - a lot of tests across the codebase want to use it

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not think creating common test data in the conftest was appropriate. I created reusable opportunities for the test module.

from tests.src.db.models.factories import UserFactory


Expand Down Expand Up @@ -71,14 +78,35 @@ def test_user_save_search_post_invalid_request(client, user, user_auth_token, db
assert len(saved_searches) == 0


def test_user_save_search_post(client, user, user_auth_token, enable_factory_create, db_session):
def test_user_save_search_post(
client,
opportunity_index,
opportunity_index_alias,
search_client,
user,
user_auth_token,
enable_factory_create,
db_session,
monkeypatch,
):
# Test data
search_name = "Test Search"
search_query = get_search_request(
funding_instrument_one_of=[FundingInstrument.GRANT],
agency_one_of=["LOC"],
agency_one_of=["NASA"],
)

# Load into the search index
schema = OpportunityV1Schema()
json_records = [schema.dump(opp) for opp in [NASA_INNOVATIONS, NASA_SUPERSONIC]]
search_client.bulk_upsert(opportunity_index, json_records, "opportunity_id")

# Swap the search index alias
alias = f"test-user_save_search-index-alias-{uuid.uuid4().int}"
monkeypatch.setenv("OPPORTUNITY_SEARCH_INDEX_ALIAS", alias)

search_client.swap_alias_index(opportunity_index, alias)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend putting anything like this into a utility/fixture - this is something we likely want in several unit tests, we shouldn't need to copy-paste chunks of code for every unit test.

We do already have a fixture for setting up the search index alias, but it's at the session level (so across all tests), might need to adjust that to also support other levels.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not want to change the existing session scoped fixture, as it is used in other session scoped fixtures. Creating another function scoped fixture looked like an overkill that's why i did this. You think we should create function scope fixture for the opportunity alias ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, a function scoped session would be fine - a module/class level one might also be worth setting up as we usually are fine with test setup for a whole file/class being reused across everything in that file/class (since that's easier to follow / faster running tests).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


# Make the request to save a search
response = client.post(
f"/v1/users/{user.user_id}/saved-searches",
Expand All @@ -88,18 +116,23 @@ def test_user_save_search_post(client, user, user_auth_token, enable_factory_cre

assert response.status_code == 200
assert response.json["message"] == "Success"

# Verify the search was saved in the database
saved_search = db_session.query(UserSavedSearch).one()

assert saved_search.user_id == user.user_id
assert saved_search.name == search_name
assert saved_search.search_query == {
"format": "json",
"filters": {"agency": {"one_of": ["LOC"]}, "funding_instrument": {"one_of": ["grant"]}},
"filters": {"agency": {"one_of": ["NASA"]}, "funding_instrument": {"one_of": ["grant"]}},
"pagination": {
"order_by": "opportunity_id",
"page_size": 25,
"page_offset": 1,
"sort_direction": "ascending",
},
}
# Verify pagination for the query was over-written. searched_opportunity_ids should be ordered by "post_date"
assert saved_search.searched_opportunity_ids == [
NASA_SUPERSONIC.opportunity_id,
NASA_INNOVATIONS.opportunity_id,
]