Skip to content

Commit

Permalink
Merge pull request #16 from JustinFrizzell/additional-integration-tes…
Browse files Browse the repository at this point in the history
…ting

Additional integration testing
  • Loading branch information
JustinFrizzell authored Jan 14, 2024
2 parents 1a914f9 + 12e75ee commit 0ea518d
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 29 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ dist/

# Logs and Databases
*.log
*.sql
*.sqlite

# Operating System Specific
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ dist/

# Logs and Databases
*.log
*.sql
*.sqlite

# Operating System Specific
Expand Down
13 changes: 11 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setuptools.setup(
name="sqlconnect",
version="0.3.0",
version="0.3.1",
author="Justin Frizzell",
description="Simplifies connections to SQL databases for data analysts. Populate DataFrames with the results of queries directly from .sql files.",
long_description=Path("README.md").read_text(encoding="utf-8"),
Expand All @@ -14,7 +14,16 @@
"Source": "https://github.com/JustinFrizzell/sqlconnect",
},
packages=setuptools.find_packages(exclude=["tests", "tests.*"]),
install_requires=["pandas", "sqlalchemy", "pyyaml", "python-dotenv", "pyodbc", "psycopg2-binary", "oracledb", "PyMySQL"],
install_requires=[
"pandas",
"sqlalchemy",
"pyyaml",
"python-dotenv",
"pyodbc",
"psycopg2-binary",
"oracledb",
"PyMySQL",
],
classifiers=[
"License :: OSI Approved :: MIT License",
"Intended Audience :: Developers",
Expand Down
107 changes: 82 additions & 25 deletions sqlconnect/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,14 @@ def __init__(
self.engine = sqlalchemy.create_engine(self.__database_url)

def sql_to_df(
self, query_path: str, **kwargs
self,
query_path: str,
index_col=None,
coerce_float=True,
params=None,
parse_dates=None,
chunksize=None,
dtype=None,
) -> Union[pd.DataFrame, Generator[pd.DataFrame, None, None]]:
"""
Execute a SQL query from a file and return the results in a pandas DataFrame.
Expand All @@ -88,10 +95,19 @@ def sql_to_df(
----------
query_path : str
The file path of the SQL query to be executed.
**kwargs
Additional keyword arguments to be passed directly to pandas.read_sql_query.
This can include parameters like 'chunksize', 'parse_dates', etc.
index_col : str or list of str, optional, default: None
Column(s) to set as index(MultiIndex).
coerce_float : bool, default True
Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point.
params : list, tuple or dict, optional, default: None
List of parameters to pass to execute method.
parse_dates : list or dict, default: None
- List of column names to parse as dates.
- Dict of {column_name: format string} where format string is strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps.
chunksize : int, optional
Return Pandas DataFrames as a generator.
dtype : Type name or dict of column -> type, optional
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
Returns
-------
Expand All @@ -102,27 +118,45 @@ def sql_to_df(
Raises
------
RuntimeError
If there is an error in executing the query.
If the file cannot be found or if there is an error in executing the query.
TypeError
If the provided query_path is not a string
Examples
--------
>>> # This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
>>> df = connection.sql_to_df("path/to/sql_query.sql", chunksize=1000)
This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
"""
if not isinstance(str(query_path), str):
if not isinstance(query_path, str):
raise TypeError("query_path must be a string")

try:
query = Path(str(query_path)).read_text(encoding="utf-8")
return pd.read_sql_query(sql=query, conn=self.engine, **kwargs)
full_path = Path(query_path).resolve()
query = full_path.read_text(encoding="utf-8")
return pd.read_sql_query(
sql=query,
con=self.engine,
index_col=index_col,
coerce_float=coerce_float,
params=params,
parse_dates=parse_dates,
chunksize=chunksize,
dtype=dtype,
)
except FileNotFoundError:
raise RuntimeError(f"File not found at: {full_path}")
except Exception as e:
raise RuntimeError(f"Error executing query: {e}")

def sql_to_df_str(
self, query: str, **kwargs
self,
query: str,
index_col=None,
coerce_float=True,
params=None,
parse_dates=None,
chunksize=None,
dtype=None,
) -> Union[pd.DataFrame, Generator[pd.DataFrame, None, None]]:
"""
Execute a SQL query from a string and return the results in a pandas DataFrame.
Expand All @@ -133,10 +167,19 @@ def sql_to_df_str(
----------
query : str
The SQL query to be executed.
**kwargs
Additional keyword arguments to be passed directly to pandas.read_sql_query.
This can include parameters like 'chunksize', 'parse_dates', etc.
index_col : str or list of str, optional, default: None
Column(s) to set as index(MultiIndex).
coerce_float : bool, default True
Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point.
params : list, tuple or dict, optional, default: None
List of parameters to pass to execute method.
parse_dates : list or dict, default: None
- List of column names to parse as dates.
- Dict of {column_name: format string} where format string is strftime compatible in case of parsing string times or is one of (D, s, ns, ms, us) in case of parsing integer timestamps.
chunksize : int, optional
Return Pandas DataFrames as a generator.
dtype : Type name or dict of column -> type, optional
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
Returns
-------
Expand All @@ -149,19 +192,28 @@ def sql_to_df_str(
RuntimeError
If there is an error in executing the query.
TypeError
If the provided query_path is not a string
If the provided query is not a string
Examples
--------
>>> df = connection.sql_to_df_str("SELECT * FROM Company.Employees", chunksize=1000)
This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
>>> # This will execute the SQL query and return a DataFrame, fetching 1000 rows at a time.
>>> df = connection.sql_to_df_str("SELECT * FROM company.employees", chunksize=1000)
"""

if not isinstance(str(query), str):
if not isinstance(query, str):
raise TypeError("query must be a string")

try:
return pd.read_sql_query(str(query), self.engine, **kwargs)
return pd.read_sql_query(
sql=query,
con=self.engine,
index_col=index_col,
coerce_float=coerce_float,
params=params,
parse_dates=parse_dates,
chunksize=chunksize,
dtype=dtype,
)
except Exception as e:
raise RuntimeError(f"Error executing query: {e}")

Expand All @@ -176,19 +228,24 @@ def execute_sql(self, sql_path: str) -> None:
Raises
------
Exception
If there is an error in executing the command.
RuntimeError
If there is an error in reading the file or executing the SQL command.
This includes file not found errors and other general exceptions.
"""
with self.engine.connect() as connection:
trans = connection.begin()
try:
command = Path(sql_path).read_text(encoding="utf-8")
full_path = Path(sql_path).resolve()
command = full_path.read_text(encoding="utf-8")
command = text(command)
connection.execute(command)
trans.commit() # Explicitly commit the transaction
except FileNotFoundError:
trans.rollback() # Rollback in case of an error
raise RuntimeError(f"File not found at: {full_path}")
except Exception as e:
trans.rollback() # Rollback in case of an error
print(f"An error occurred: {e}")
raise RuntimeError(f"An error occurred: {e}")

def execute_sql_str(self, command: str) -> None:
"""
Expand Down
1 change: 1 addition & 0 deletions tests/integration/sql/test_employees.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT name FROM public.employees WHERE position = 'Data Engineer'
4 changes: 4 additions & 0 deletions tests/integration/sql/test_insert_record.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
INSERT INTO public.employees
(id, "name", "position", database_url)
VALUES
(2, 'John Doe', 'Software Engineer', 'http://example.com/db/johndoe');
34 changes: 34 additions & 0 deletions tests/integration/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,37 @@ def test_sql_to_df_str_postgres(setup_env, setup_connections):
expected = pd.DataFrame({"name": ["Jane Doe"]})

pd.testing.assert_frame_equal(df, expected)


def test_sql_to_df_postgres(setup_env, setup_connections):
conn = sc.Sqlconnector("Postgres")

df = conn.sql_to_df("tests/integration/sql/test_employees.sql")

expected = pd.DataFrame({"name": ["Jane Doe"]})

pd.testing.assert_frame_equal(df, expected)


def test_execute_sql_str_postgres(setup_env, setup_connections):
conn = sc.Sqlconnector("Postgres")
conn.execute_sql_str(
""" INSERT INTO public.employees
(id, "name", "position", database_url)
VALUES
(3, 'John S', 'Data Engineer', 'http://example.com/db/johns');
"""
)


def test_execute_sql_postgres(setup_env, setup_connections):
conn = sc.Sqlconnector("Postgres")
conn.execute_sql("tests/integration/sql/test_insert_record.sql")


def test_df_to_sql_postgres(setup_env, setup_connections):
conn = sc.Sqlconnector("Postgres")

df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})

conn.df_to_sql(df, "table_name", if_exists="append", index=False)

0 comments on commit 0ea518d

Please sign in to comment.