-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
71 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,6 @@ | ||
"""We use this package to extract CSV files from S3 bucket and loads them into respective Opportunity Tables""" | ||
# pylint: disable=line-too-long | ||
""" | ||
We use this package to extract CSV files | ||
from S3 bucket and loads them into respective | ||
Opportunity Tables. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 24 additions & 16 deletions
40
analytics/src/analytics/integrations/extracts/load_opportunity_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,57 @@ | ||
# pylint: disable=invalid-name, line-too-long | ||
"""Loads opportunity tables with opportunity data from S3.""" | ||
|
||
import logging | ||
import os | ||
from io import BytesIO | ||
from urllib.parse import urlparse | ||
import smart_open | ||
from contextlib import ExitStack | ||
|
||
import smart_open | ||
|
||
from analytics.integrations.etldb.etldb import EtlDb | ||
from analytics.integrations.extracts.constants import ( | ||
MAP_TABLES_TO_COLS, | ||
OpportunityTables, | ||
) | ||
from analytics.integrations.extracts.s3_config import S3Config, get_s3_client | ||
from analytics.integrations.extracts.s3_config import S3Config | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
|
||
def extract_copy_opportunity_data() -> None: | ||
"""Instantiate Etldb class and calls _fetch_insert_opportunity_data with database connection object """ | ||
""" | ||
Instantiate Etldb class and | ||
calls _fetch_insert_opportunity_data with database connection object. | ||
""" | ||
etldb_conn = EtlDb() | ||
_fetch_insert_opportunity_data(etldb_conn.connection()) | ||
|
||
logger.info("Extract opportunity data completed successfully") | ||
|
||
|
||
|
||
def _fetch_insert_opportunity_data(conn: EtlDb.connection ) -> None: | ||
"""Streamlines opportunity tables from S3 and insert into corresponding tables in the database.""" | ||
def _fetch_insert_opportunity_data(conn: EtlDb.connection) -> None: | ||
""" | ||
Streamlines opportunity tables from S3 and | ||
insert into corresponding tables in the database. | ||
""" | ||
s3_config = S3Config() | ||
|
||
with conn.begin(): | ||
cursor = conn.connection.cursor() | ||
for table in OpportunityTables: | ||
logger.info(f"Copying data for table: {table}") | ||
logger.info("Copying data for table: %s", table) | ||
|
||
columns = MAP_TABLES_TO_COLS.get(table, []) | ||
s3_uri = f"s3://{s3_config.s3_opportunity_bucket}/{s3_config.s3_opportunity_file_path_prefix}/{table}.csv" | ||
query = f""" | ||
COPY {f"{os.getenv("DB_SCHEMA")}.{table} ({', '.join(columns)})"} | ||
FROM STDIN WITH (FORMAT CSV, DELIMITER ',', QUOTE '"', HEADER) | ||
""" | ||
# Open the S3 object for reading | ||
with smart_open.open(s3_uri, 'r') as file: | ||
with cursor.copy(query) as copy: | ||
while data := file.read(): | ||
copy.write(data) | ||
|
||
logger.info(f"Successfully loaded data for table: {table}") | ||
with ExitStack() as stack: | ||
file = stack.enter_context(smart_open.open(s3_uri, "r")) | ||
copy = stack.enter_context(cursor.copy(query)) | ||
|
||
while data := file.read(): | ||
copy.write(data) | ||
|
||
logger.info("Successfully loaded data for table: %S", table) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters