Merge pull request #16 from JustinFrizzell/additional-integration-tes…

…ting Additional integration testing
JustinFrizzell · Jan 14, 2024 · 0ea518d · 0ea518d
2 parents 1a914f9 + 12e75ee
commit 0ea518d
Show file tree

Hide file tree

Showing 7 changed files with 132 additions and 29 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -22,7 +22,6 @@ dist/
 
 # Logs and Databases
 *.log
-*.sql
 *.sqlite
 
 # Operating System Specific

diff --git a/.gitignore b/.gitignore
@@ -22,7 +22,6 @@ dist/
 
 # Logs and Databases
 *.log
-*.sql
 *.sqlite
 
 # Operating System Specific

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setuptools.setup(
     name="sqlconnect",
-    version="0.3.0",
+    version="0.3.1",
     author="Justin Frizzell",
     description="Simplifies connections to SQL databases for data analysts. Populate DataFrames with the results of queries directly from .sql files.",
     long_description=Path("README.md").read_text(encoding="utf-8"),
@@ -14,7 +14,16 @@
         "Source": "https://github.com/JustinFrizzell/sqlconnect",
     },
     packages=setuptools.find_packages(exclude=["tests", "tests.*"]),
-    install_requires=["pandas", "sqlalchemy", "pyyaml", "python-dotenv", "pyodbc", "psycopg2-binary", "oracledb", "PyMySQL"],
+    install_requires=[
+        "pandas",
+        "sqlalchemy",
+        "pyyaml",
+        "python-dotenv",
+        "pyodbc",
+        "psycopg2-binary",
+        "oracledb",
+        "PyMySQL",
+    ],
     classifiers=[
         "License :: OSI Approved :: MIT License",
         "Intended Audience :: Developers",

diff --git a/sqlconnect/connector.py b/sqlconnect/connector.py
@@ -77,7 +77,14 @@ def __init__(
         self.engine = sqlalchemy.create_engine(self.__database_url)
 
     def sql_to_df(
-        self, query_path: str, **kwargs
+        self,
+        query_path: str,
+        index_col=None,
+        coerce_float=True,
+        params=None,
+        parse_dates=None,
+        chunksize=None,
+        dtype=None,
     ) -> Union[pd.DataFrame, Generator[pd.DataFrame, None, None]]:
         """
         Execute a SQL query from a file and return the results in a pandas DataFrame.
@@ -88,10 +95,19 @@ def sql_to_df(
         ----------
         query_path : str
             The file path of the SQL query to be executed.
-
-        **kwargs
-            Additional keyword arguments to be passed directly to pandas.read_sql_query.
-            This can include parameters like 'chunksize', 'parse_dates', etc.
+        index_col : str or list of str, optional, default: None
+            Column(s) to set as index(MultiIndex).
+        coerce_float : bool, default True
+            Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point.
+        params : list, tuple or dict, optional, default: None
+            List of parameters to pass to execute method.
+        parse_dates : list or dict, default: None
+            - List of column names to parse as dates.
+            - Dict of {column_name: format string} where format string is strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps.
+        chunksize : int, optional
+            Return Pandas DataFrames as a generator.
+        dtype : Type name or dict of column -> type, optional
+            Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
 
         Returns
         -------
@@ -102,27 +118,45 @@ def sql_to_df(
         Raises
         ------
         RuntimeError
-            If there is an error in executing the query.
+            If the file cannot be found or if there is an error in executing the query.
         TypeError
             If the provided query_path is not a string
 
         Examples
         --------
+        >>> # This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
         >>> df = connection.sql_to_df("path/to/sql_query.sql", chunksize=1000)
-        This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
-
         """
-        if not isinstance(str(query_path), str):
+        if not isinstance(query_path, str):
             raise TypeError("query_path must be a string")
 
         try:
-            query = Path(str(query_path)).read_text(encoding="utf-8")
-            return pd.read_sql_query(sql=query, conn=self.engine, **kwargs)
+            full_path = Path(query_path).resolve()
+            query = full_path.read_text(encoding="utf-8")
+            return pd.read_sql_query(
+                sql=query,
+                con=self.engine,
+                index_col=index_col,
+                coerce_float=coerce_float,
+                params=params,
+                parse_dates=parse_dates,
+                chunksize=chunksize,
+                dtype=dtype,
+            )
+        except FileNotFoundError:
+            raise RuntimeError(f"File not found at: {full_path}")
         except Exception as e:
             raise RuntimeError(f"Error executing query: {e}")
 
     def sql_to_df_str(
-        self, query: str, **kwargs
+        self,
+        query: str,
+        index_col=None,
+        coerce_float=True,
+        params=None,
+        parse_dates=None,
+        chunksize=None,
+        dtype=None,
     ) -> Union[pd.DataFrame, Generator[pd.DataFrame, None, None]]:
         """
         Execute a SQL query from a string and return the results in a pandas DataFrame.
@@ -133,10 +167,19 @@ def sql_to_df_str(
         ----------
         query : str
             The SQL query to be executed.
-
-        **kwargs
-            Additional keyword arguments to be passed directly to pandas.read_sql_query.
-            This can include parameters like 'chunksize', 'parse_dates', etc.
+        index_col : str or list of str, optional, default: None
+            Column(s) to set as index(MultiIndex).
+        coerce_float : bool, default True
+            Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point.
+        params : list, tuple or dict, optional, default: None
+            List of parameters to pass to execute method.
+        parse_dates : list or dict, default: None
+            - List of column names to parse as dates.
+            - Dict of {column_name: format string} where format string is strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps.
+        chunksize : int, optional
+            Return Pandas DataFrames as a generator.
+        dtype : Type name or dict of column -> type, optional
+            Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
 
         Returns
         -------
@@ -149,19 +192,28 @@ def sql_to_df_str(
         RuntimeError
             If there is an error in executing the query.
         TypeError
-            If the provided query_path is not a string
+            If the provided query is not a string
 
         Examples
         --------
-        >>> df = connection.sql_to_df_str("SELECT * FROM Company.Employees", chunksize=1000)
-        This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
+        >>> # This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
+        >>> df = connection.sql_to_df_str("SELECT * FROM company.employees", chunksize=1000)
         """
 
-        if not isinstance(str(query), str):
+        if not isinstance(query, str):
             raise TypeError("query must be a string")
 
         try:
-            return pd.read_sql_query(str(query), self.engine, **kwargs)
+            return pd.read_sql_query(
+                sql=query,
+                con=self.engine,
+                index_col=index_col,
+                coerce_float=coerce_float,
+                params=params,
+                parse_dates=parse_dates,
+                chunksize=chunksize,
+                dtype=dtype,
+            )
         except Exception as e:
             raise RuntimeError(f"Error executing query: {e}")
 
@@ -176,19 +228,24 @@ def execute_sql(self, sql_path: str) -> None:
 
         Raises
         ------
-        Exception
-            If there is an error in executing the command.
+        RuntimeError
+            If there is an error in reading the file or executing the SQL command.
+            This includes file not found errors and other general exceptions.
         """
         with self.engine.connect() as connection:
             trans = connection.begin()
             try:
-                command = Path(sql_path).read_text(encoding="utf-8")
+                full_path = Path(sql_path).resolve()
+                command = full_path.read_text(encoding="utf-8")
                 command = text(command)
                 connection.execute(command)
                 trans.commit()  # Explicitly commit the transaction
+            except FileNotFoundError:
+                trans.rollback()  # Rollback in case of an error
+                raise RuntimeError(f"File not found at: {full_path}")
             except Exception as e:
                 trans.rollback()  # Rollback in case of an error
-                print(f"An error occurred: {e}")
+                raise RuntimeError(f"An error occurred: {e}")
 
     def execute_sql_str(self, command: str) -> None:
         """

diff --git a/tests/integration/sql/test_employees.sql b/tests/integration/sql/test_employees.sql
@@ -0,0 +1 @@
+SELECT name FROM public.employees WHERE position = 'Data Engineer'
diff --git a/tests/integration/sql/test_insert_record.sql b/tests/integration/sql/test_insert_record.sql
@@ -0,0 +1,4 @@
+INSERT INTO public.employees
+(id, "name", "position", database_url)
+VALUES
+(2, 'John Doe', 'Software Engineer', 'http://example.com/db/johndoe');
diff --git a/tests/integration/test_postgres.py b/tests/integration/test_postgres.py
@@ -48,3 +48,37 @@ def test_sql_to_df_str_postgres(setup_env, setup_connections):
     expected = pd.DataFrame({"name": ["Jane Doe"]})
 
     pd.testing.assert_frame_equal(df, expected)
+
+
+def test_sql_to_df_postgres(setup_env, setup_connections):
+    conn = sc.Sqlconnector("Postgres")
+
+    df = conn.sql_to_df("tests/integration/sql/test_employees.sql")
+
+    expected = pd.DataFrame({"name": ["Jane Doe"]})
+
+    pd.testing.assert_frame_equal(df, expected)
+
+
+def test_execute_sql_str_postgres(setup_env, setup_connections):
+    conn = sc.Sqlconnector("Postgres")
+    conn.execute_sql_str(
+        """ INSERT INTO public.employees
+            (id, "name", "position", database_url)
+            VALUES
+            (3, 'John S', 'Data Engineer', 'http://example.com/db/johns');
+        """
+    )
+
+
+def test_execute_sql_postgres(setup_env, setup_connections):
+    conn = sc.Sqlconnector("Postgres")
+    conn.execute_sql("tests/integration/sql/test_insert_record.sql")
+
+
+def test_df_to_sql_postgres(setup_env, setup_connections):
+    conn = sc.Sqlconnector("Postgres")
+
+    df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
+
+    conn.df_to_sql(df, "table_name", if_exists="append", index=False)
-Original file line number
+Diff line change
@@ Expand Up / @@ -22,7 +22,6 @@ dist/ @@
     # Logs and Databases
     *.log
-    *.sql
     *.sqlite
     # Operating System Specific
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		SELECT name FROM public.employees WHERE position = 'Data Engineer'