Skip to content

Commit

Permalink
Implemented conflict resolution based on the number of filled cells w…
Browse files Browse the repository at this point in the history
…hen synchronizing data between Google Sheet and database.
  • Loading branch information
JamesAlfonse authored Dec 5, 2024
1 parent b752289 commit ad07419
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 52 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/Synchronize_Sheet_and_DB.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ name: Synchronize Google Sheet and Database
on:
schedule:
- cron: '0 0 * * *' # Runs every day at midnight
workflow_dispatch: # Allows manual triggering
workflow_dispatch: # Allows manual triggering

concurrency:
concurrency:
group: database-update

jobs:
Expand Down
63 changes: 31 additions & 32 deletions src/scripts/database_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,41 +52,23 @@ def update_database(self, data):
conn = sqlite3.connect(self.db_file_path)
cursor = conn.cursor()

# Fetch existing data from the database
cursor.execute('SELECT * FROM full_database_backend')
existing_rows = cursor.fetchall()
existing_data = {}
for row in existing_rows:
key = (row[18], row[0], row[2]) # CIK, Ticker, CompanyNameIssuer
existing_data[key] = row

for row in data:
# Ensure row has exactly 27 elements
row = row + [''] * (27 - len(row))
key = (row[18], row[0], row[2]) # CIK, Ticker, CompanyNameIssuer

if key in existing_data:
# Compare the number of non-empty cells
existing_row = existing_data[key]
sheet_non_empty = sum(1 for cell in row if cell.strip())
db_non_empty = sum(1 for cell in existing_row if cell and str(cell).strip())
# Get the key
CIK = row[18]
Ticker = row[0]
CompanyNameIssuer = row[2]
key = (CIK, Ticker, CompanyNameIssuer)

# Check if record exists in database
cursor.execute('''
SELECT * FROM full_database_backend WHERE CIK=? AND Ticker=? AND CompanyNameIssuer=?
''', key)
db_row = cursor.fetchone()

if sheet_non_empty > db_non_empty:
# Update the database with the Google Sheet row
cursor.execute('''
INSERT OR REPLACE INTO full_database_backend (
Ticker, Exchange, CompanyNameIssuer, TransferAgent, OnlinePurchase, DTCMemberNum, TAURL,
TransferAgentPct, IREmails, IRPhoneNum, IRCompanyAddress, IRURL, IRContactInfo, SharesOutstanding,
CUSIP, CompanyInfoURL, CompanyInfo, FullProgressPct, CIK, DRS, PercentSharesDRSd, SubmissionReceived,
TimestampsUTC, LearnMoreAboutDRS, CertificatesOffered, SandP500, IncorporatedIn
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', tuple(row))
print(f"Updated row in database for key {key} with data from Google Sheet.")
else:
# Keep the existing database row
print(f"Kept existing database row for key {key}.")
else:
# Insert the new row from Google Sheet into the database
if db_row is None:
# Record does not exist, insert new row
cursor.execute('''
INSERT INTO full_database_backend (
Ticker, Exchange, CompanyNameIssuer, TransferAgent, OnlinePurchase, DTCMemberNum, TAURL,
Expand All @@ -95,7 +77,24 @@ def update_database(self, data):
TimestampsUTC, LearnMoreAboutDRS, CertificatesOffered, SandP500, IncorporatedIn
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', tuple(row))
print(f"Inserted new row into database for key {key}.")
else:
# Record exists, compare number of filled cells
db_row_values = list(db_row)
sheet_filled = sum(1 for cell in row if cell.strip())
db_filled = sum(1 for cell in db_row_values if cell and str(cell).strip())
if sheet_filled > db_filled:
# Sheet has more data, update the database
cursor.execute('''
REPLACE INTO full_database_backend (
Ticker, Exchange, CompanyNameIssuer, TransferAgent, OnlinePurchase, DTCMemberNum, TAURL,
TransferAgentPct, IREmails, IRPhoneNum, IRCompanyAddress, IRURL, IRContactInfo, SharesOutstanding,
CUSIP, CompanyInfoURL, CompanyInfo, FullProgressPct, CIK, DRS, PercentSharesDRSd, SubmissionReceived,
TimestampsUTC, LearnMoreAboutDRS, CertificatesOffered, SandP500, IncorporatedIn
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', tuple(row))
else:
# Keep the database row as is
continue

conn.commit()
conn.close()
Expand Down
33 changes: 16 additions & 17 deletions src/scripts/google_sheet_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,32 @@ def update_google_sheet(self, worksheet_name, db_data):

updates = []
for row in db_data:
# Ensure row has exactly 27 elements
row = list(row)
row = list(row) # Convert tuple to list
row = [str(cell) if cell is not None else '' for cell in row]
# Ensure row has 27 elements
row = row + [''] * (27 - len(row))

CIK = row[18]
Ticker = row[0]
CompanyNameIssuer = row[2]
key = (CIK, Ticker, CompanyNameIssuer)
if key in key_to_row:
row_number = key_to_row[key]
# Get the existing row from Google Sheet
# Get the existing sheet row
sheet_row = worksheet.row_values(row_number)
sheet_row = sheet_row + [''] * (27 - len(sheet_row))

# Compare the number of non-empty cells
db_non_empty = sum(1 for cell in row if str(cell).strip())
sheet_non_empty = sum(1 for cell in sheet_row if cell.strip())

if db_non_empty > sheet_non_empty:
# Update the entire row in Google Sheet with data from database
cell_list = worksheet.range(row_number, 1, row_number, 27)
for i, cell in enumerate(cell_list):
cell.value = row[i] if row[i] else ''
updates.extend(cell_list)
print(f"Updated row {row_number} in Google Sheet for key {key} with data from database.")
# Compare number of filled cells
db_filled = sum(1 for cell in row if cell.strip())
sheet_filled = sum(1 for cell in sheet_row if cell.strip())
if db_filled > sheet_filled:
# Update the sheet row with db row
for i in range(27):
if sheet_row[i] != row[i]:
cell = gspread.Cell(row_number, i + 1, row[i])
updates.append(cell)
else:
# Keep the existing Google Sheet row
print(f"Kept existing Google Sheet row for key {key}.")
# Keep the sheet row as is
continue
else:
# Append new row
new_row = [row[i] if row[i] else '' for i in range(27)]
Expand Down
1 change: 0 additions & 1 deletion src/scripts/synchronize_google_sheet_and_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from google_sheet_utils import GoogleSheetHandler
from database_utils import DatabaseHandler


def main():
# Load credentials and environment variables
sheet_id = os.environ['SHEET_ID']
Expand Down

0 comments on commit ad07419

Please sign in to comment.