-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The primary key is now (CIK, Ticker, CompanyNameIssuer) as requested.
The ON CONFLICT clause has been removed, so new unique combinations will simply be inserted as new rows. If a combination (CIK, Ticker, CompanyNameIssuer) already exists, it will remain unchanged. Code variables and function names have been made more descriptive and consistent. The change_primary_key_to_cik.py file and the associated step in the GitHub Actions workflow have been removed.
- Loading branch information
1 parent
9de681a
commit 046802e
Showing
4 changed files
with
156 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
name: Update Database with SEC Data (CIK, Ticker, Exchange, Company Name) | ||
|
||
on: | ||
schedule: | ||
- cron: '30 23 * * *' # Runs every day at 23:30 | ||
workflow_dispatch: # Allows manual triggering | ||
|
||
concurrency: | ||
group: database-update | ||
|
||
jobs: | ||
update-data: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v2 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.x' | ||
|
||
- name: Install dependencies | ||
run: pip install requests pandas gspread oauth2client | ||
|
||
- name: Pull SEC data and update repository | ||
run: python src/scripts/SEC_company_tickers_exchange.py | ||
|
||
- name: Update Database from JSON | ||
run: python src/scripts/update_db_from_json.py | ||
|
||
- name: Verify changes | ||
run: | | ||
echo "Checking for changes in data files..." | ||
git status | ||
- name: Commit and push changes | ||
run: | | ||
git config user.name "github-actions[bot]" | ||
git config user.email "github-actions[bot]@users.noreply.github.com" | ||
git add data/company_tickers_exchange.json data/Full_Database_Backend.db | ||
git commit -m "Updated database with SEC data" | ||
git push | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,35 +2,33 @@ | |
import os | ||
import time | ||
|
||
# URL of the JSON file | ||
url = "https://www.sec.gov/files/company_tickers_exchange.json" | ||
# URL for the SEC JSON data | ||
SEC_JSON_URL = "https://www.sec.gov/files/company_tickers_exchange.json" | ||
|
||
# Path to the data folder | ||
data_folder = "data" | ||
os.makedirs(data_folder, exist_ok=True) | ||
# Data folder and output file paths | ||
DATA_FOLDER = "data" | ||
os.makedirs(DATA_FOLDER, exist_ok=True) | ||
OUTPUT_FILE = os.path.join(DATA_FOLDER, "company_tickers_exchange.json") | ||
|
||
# Path to the output file | ||
output_file = os.path.join(data_folder, "company_tickers_exchange.json") | ||
|
||
# Set headers to mimic a browser request responsibly | ||
headers = { | ||
# HTTP headers to mimic a browser and provide contact info | ||
HEADERS = { | ||
"User-Agent": "MyAppName/1.0 ([email protected])" | ||
} | ||
|
||
# Rate limit parameters | ||
max_requests_per_second = 10 | ||
sleep_time = 1 / max_requests_per_second | ||
# Rate limit configuration | ||
MAX_REQUESTS_PER_SECOND = 10 | ||
SLEEP_TIME = 1 / MAX_REQUESTS_PER_SECOND | ||
|
||
# Function to download the JSON file | ||
def download_file(url, headers, output_file): | ||
def download_sec_data(url, headers, output_file): | ||
"""Download the SEC JSON data and save it to a file.""" | ||
response = requests.get(url, headers=headers) | ||
response.raise_for_status() # Check that the request was successful | ||
with open(output_file, "wb") as f: | ||
f.write(response.content) | ||
print(f"File saved to {output_file}") | ||
response.raise_for_status() # Raise an error if the request failed | ||
with open(output_file, "wb") as file: | ||
file.write(response.content) | ||
print(f"SEC data file saved to {output_file}") | ||
|
||
# Download the JSON file | ||
download_file(url, headers, output_file) | ||
# Download the JSON data | ||
download_sec_data(SEC_JSON_URL, HEADERS, OUTPUT_FILE) | ||
|
||
# Sleep to respect rate limit | ||
time.sleep(sleep_time) | ||
# Sleep to respect rate limits | ||
time.sleep(SLEEP_TIME) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters