From ef781680ca31ce55e4518e3f467e20f57048fcde Mon Sep 17 00:00:00 2001 From: Ernesto Ruy Sanchez Date: Sat, 8 Feb 2025 10:30:56 -0800 Subject: [PATCH 1/4] validate tickers first --- src/main.py | 8 ++++++++ src/tools/api.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/main.py b/src/main.py index 078f6646..f60974ae 100644 --- a/src/main.py +++ b/src/main.py @@ -53,6 +53,14 @@ def run_hedge_fund( model_name: str = "gpt-4o", model_provider: str = "OpenAI", ): + # Validate tickers first + from tools.api import validate_tickers + + is_valid, invalid_tickers = validate_tickers(tickers) + if not is_valid: + print(f"{Fore.RED}Error: The following tickers are invalid: {', '.join(invalid_tickers)}{Style.RESET_ALL}") + return None + # Start progress tracking progress.start() diff --git a/src/tools/api.py b/src/tools/api.py index 2b2c9c01..9cdea719 100644 --- a/src/tools/api.py +++ b/src/tools/api.py @@ -280,3 +280,21 @@ def prices_to_df(prices: list[Price]) -> pd.DataFrame: def get_price_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame: prices = get_prices(ticker, start_date, end_date) return prices_to_df(prices) + + +def validate_tickers(tickers: list[str]) -> tuple[bool, list[str]]: + """ + Validate a list of tickers against the available tickers list. + Returns a tuple of (is_valid, invalid_tickers). + """ + try: + response = requests.get("https://virattt.github.io/datasets/financials/available_tickers.json") + if response.status_code != 200: + raise Exception(f"Error fetching valid tickers: {response.status_code} - {response.text}") + + valid_tickers = {ticker["symbol"] for ticker in response.json()["tickers"]} + invalid_tickers = [ticker for ticker in tickers if ticker not in valid_tickers] + + return len(invalid_tickers) == 0, invalid_tickers + except Exception as e: + raise Exception(f"Error validating tickers: {str(e)}") From 50622d5d502a116c74452ae1c2870b63379a0240 Mon Sep 17 00:00:00 2001 From: Ernesto Ruy Sanchez Date: Sat, 8 Feb 2025 12:11:14 -0800 Subject: [PATCH 2/4] validate tickers from sec.gov url --- src/tools/api.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/tools/api.py b/src/tools/api.py index 9cdea719..33d96f43 100644 --- a/src/tools/api.py +++ b/src/tools/api.py @@ -284,16 +284,20 @@ def get_price_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame: def validate_tickers(tickers: list[str]) -> tuple[bool, list[str]]: """ - Validate a list of tickers against the available tickers list. + Validate a list of tickers against the SEC's official tickers list. Returns a tuple of (is_valid, invalid_tickers). """ try: - response = requests.get("https://virattt.github.io/datasets/financials/available_tickers.json") + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + response = requests.get("https://www.sec.gov/files/company_tickers.json", headers=headers) if response.status_code != 200: raise Exception(f"Error fetching valid tickers: {response.status_code} - {response.text}") - valid_tickers = {ticker["symbol"] for ticker in response.json()["tickers"]} - invalid_tickers = [ticker for ticker in tickers if ticker not in valid_tickers] + # SEC data is in format: {"0":{"cik_str":320193,"ticker":"AAPL","title":"Apple Inc."}, ...} + valid_tickers = {company_info['ticker'] for company_info in response.json().values()} + invalid_tickers = [ticker for ticker in tickers if ticker.upper() not in valid_tickers] return len(invalid_tickers) == 0, invalid_tickers except Exception as e: From 2b51136619123b2beb5cd72e0fff52171b8004cb Mon Sep 17 00:00:00 2001 From: Ernesto Ruy Sanchez Date: Sat, 8 Feb 2025 12:19:40 -0800 Subject: [PATCH 3/4] Revert "validate tickers from sec.gov url" This reverts commit 50622d5d502a116c74452ae1c2870b63379a0240. --- src/tools/api.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/tools/api.py b/src/tools/api.py index 33d96f43..9cdea719 100644 --- a/src/tools/api.py +++ b/src/tools/api.py @@ -284,20 +284,16 @@ def get_price_data(ticker: str, start_date: str, end_date: str) -> pd.DataFrame: def validate_tickers(tickers: list[str]) -> tuple[bool, list[str]]: """ - Validate a list of tickers against the SEC's official tickers list. + Validate a list of tickers against the available tickers list. Returns a tuple of (is_valid, invalid_tickers). """ try: - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' - } - response = requests.get("https://www.sec.gov/files/company_tickers.json", headers=headers) + response = requests.get("https://virattt.github.io/datasets/financials/available_tickers.json") if response.status_code != 200: raise Exception(f"Error fetching valid tickers: {response.status_code} - {response.text}") - # SEC data is in format: {"0":{"cik_str":320193,"ticker":"AAPL","title":"Apple Inc."}, ...} - valid_tickers = {company_info['ticker'] for company_info in response.json().values()} - invalid_tickers = [ticker for ticker in tickers if ticker.upper() not in valid_tickers] + valid_tickers = {ticker["symbol"] for ticker in response.json()["tickers"]} + invalid_tickers = [ticker for ticker in tickers if ticker not in valid_tickers] return len(invalid_tickers) == 0, invalid_tickers except Exception as e: From eff27db1a3e99389c343b9151d5f45f3b2d91fb3 Mon Sep 17 00:00:00 2001 From: Ernesto Ruy Sanchez Date: Sat, 8 Feb 2025 12:23:33 -0800 Subject: [PATCH 4/4] ask if user wants to procee with just valid symbols --- src/main.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/main.py b/src/main.py index f60974ae..eabbe0ca 100644 --- a/src/main.py +++ b/src/main.py @@ -53,14 +53,6 @@ def run_hedge_fund( model_name: str = "gpt-4o", model_provider: str = "OpenAI", ): - # Validate tickers first - from tools.api import validate_tickers - - is_valid, invalid_tickers = validate_tickers(tickers) - if not is_valid: - print(f"{Fore.RED}Error: The following tickers are invalid: {', '.join(invalid_tickers)}{Style.RESET_ALL}") - return None - # Start progress tracking progress.start() @@ -179,6 +171,28 @@ def create_workflow(selected_analysts=None): # Parse tickers from comma-separated string tickers = [ticker.strip() for ticker in args.tickers.split(",")] + # Validate tickers before proceeding + from tools.api import validate_tickers + is_valid, invalid_tickers = validate_tickers(tickers) + if not is_valid: + valid_tickers = [ticker for ticker in tickers if ticker not in invalid_tickers] + if not valid_tickers: + print(f"{Fore.RED}Error: All provided tickers are invalid: {', '.join(invalid_tickers)}{Style.RESET_ALL}") + sys.exit(1) + + print(f"{Fore.YELLOW}Warning: The following tickers are invalid: {', '.join(invalid_tickers)}{Style.RESET_ALL}") + proceed = questionary.confirm( + f"Do you want to proceed with only the valid tickers: {', '.join(valid_tickers)}?", + default=True + ).ask() + + if not proceed: + print("\nExiting...") + sys.exit(0) + + print(f"\nProceeding with tickers: {', '.join(valid_tickers)}\n") + tickers = valid_tickers + # Select analysts selected_analysts = None choices = questionary.checkbox( @@ -201,7 +215,7 @@ def create_workflow(selected_analysts=None): sys.exit(0) else: selected_analysts = choices - print(f"\nSelected analysts: {', '.join(Fore.GREEN + choice.title().replace('_', ' ') + Style.RESET_ALL for choice in choices)}\n") + print(f"\nSelected analysts: {', '.join(choice.title().replace('_', ' ') for choice in choices)}\n") # Select LLM model model_choice = questionary.select(