From a133f4a9fc3e53777857399f0e15b6decf84f78f Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Mon, 28 Oct 2024 20:55:06 -0400 Subject: [PATCH] update comment --- app/api/crud.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index eab177c..92ca097 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -208,8 +208,9 @@ async def get( "session_type", "pipeline_name", ], - # Keep NaNs to ensure that when there are no pipeline_name values in the query result, - # we don't end up with an empty dataframe for pipeline_grouped_data + # We cannot drop NaNs here because sessions without pipelines (i.e., with empty values for pipeline_name) + # would otherwise be completely removed and in an extreme case where no matching sessions have pipeline info, + # we'd end up with an empty dataframe. dropna=False, ).agg( { @@ -236,9 +237,12 @@ async def get( if not pd.isnull(pname) } ) - # NOTE: This expects a pd.Series and will not work on a pd.DataFrame - # (pd.DataFrame.reset_index() doesn't have a "name" arg) - # See related https://github.com/pandas-dev/pandas/issues/55225 + # NOTE: The below function expects a pd.Series only. + # This can break if the result of the apply function is a pd.DataFrame + # (pd.DataFrame.reset_index() doesn't have a "name" arg), + # which can happen if the original dataframe being operated on is empty. + # For example, see https://github.com/neurobagel/api/issues/367. + # (Related: https://github.com/pandas-dev/pandas/issues/55225) .reset_index(name="completed_pipelines") )