From 34804a2d286fadd223424093a97b02b2f705d397 Mon Sep 17 00:00:00 2001 From: Sid Murching Date: Thu, 26 Sep 2024 12:17:12 -0400 Subject: [PATCH] Fix test Signed-off-by: Sid Murching --- agent_app_sample_code/00_global_config.py | 2 +- agent_app_sample_code/tests/test_file_loading.py | 5 +++++ agent_app_sample_code/utils/file_loading.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/agent_app_sample_code/00_global_config.py b/agent_app_sample_code/00_global_config.py index 0f4493b..c4c43ed 100644 --- a/agent_app_sample_code/00_global_config.py +++ b/agent_app_sample_code/00_global_config.py @@ -39,7 +39,7 @@ # UC Catalog & Schema where outputs tables/indexes are saved # By default, will use the current user name to create a unique UC catalog/schema & vector search endpoint # If this catalog/schema does not exist, you need create catalog/schema permissions. -UC_CATALOG = f"smurching" +UC_CATALOG = f"{user_name}_catalog" UC_SCHEMA = f"cookbook" ## UC Model name where the Agent's model is logged diff --git a/agent_app_sample_code/tests/test_file_loading.py b/agent_app_sample_code/tests/test_file_loading.py index 6c06b67..85abf1e 100644 --- a/agent_app_sample_code/tests/test_file_loading.py +++ b/agent_app_sample_code/tests/test_file_loading.py @@ -39,3 +39,8 @@ def test_load_files_to_df(spark, tmpdir): "content": "file2 content", }]) pd.testing.assert_frame_equal(raw_pandas_df, expected_df) + +def test_load_files_to_df_throws_if_no_files(spark, tmpdir): + temp_dir = tmpdir.mkdir("files_subdir") + with pytest.raises(Exception, match="does not contain any files"): + load_files_to_df(spark, str(temp_dir)) diff --git a/agent_app_sample_code/utils/file_loading.py b/agent_app_sample_code/utils/file_loading.py index 2cfb94c..dd4eabe 100644 --- a/agent_app_sample_code/utils/file_loading.py +++ b/agent_app_sample_code/utils/file_loading.py @@ -83,7 +83,7 @@ def load_files_to_df( # Load the raw riles raw_files_df = ( - https://spark.apache.org/docs/latest/sql-data-sources-binaryFile.html.option("recursiveFileLookup", "true") + spark.read.format("binaryFile").option("recursiveFileLookup", "true") .load(source_path) )