diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..44190e0 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,50 @@ +name: Python Build and Docker Image Build/Push + +on: + push: + branches: + - main # Trigger on push to the main branch + pull_request: + branches: + - main # Trigger on pull request to the main branch + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python 3.11.2 + uses: actions/setup-python@v4 + with: + python-version: 3.11.2 + cache: 'pip' # caching pip dependencies + + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Generate Token + id: generate-token + uses: tibdex/github-app-token@v2.1.0 + with: + app_id: "${{ secrets.BOT_APP_ID }}" + private_key: "${{ secrets.BOT_APP_PRIVATE_KEY }}" + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: "unusualpseudo" + password: "${{ steps.generate-token.outputs.token }}" + + - name: Build Docker image + run: | + docker build -t ghcr.io/unusualpseudo/stockbot:latest . + + - name: Push Docker image + run: | + docker push ghcr.io/unusualpseudo/stockbot:latest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f99d414 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/data/ +/logs/ +/venv/ +*.iml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..73a4c3c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +# Use the Spark image as a base +FROM spark:3.5.3-scala2.12-java17-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y python3 python3-pip; \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY . /app +RUN pip install -r requirements.txt + + +VOLUME /app/data +VOLUME /app/logs + +# Command to run the Python script and redirect output to a log file with a timestamp +CMD ["sh", "-c", "python3 ./app/fundamental_analysis.py > /app/logs/fundamental_analysis_$(date +'%Y%m%d_%H%M%S').log 2>&1"] diff --git a/app/fundamental_analysis.py b/app/fundamental_analysis.py new file mode 100644 index 0000000..bc2c93b --- /dev/null +++ b/app/fundamental_analysis.py @@ -0,0 +1,76 @@ +import yfinance as yf +import pandas as pd +from pyspark.sql import SparkSession + +# Function to collect financial data for a single stock +def get_metrics(ticker): + stock = yf.Ticker(ticker) + + # Get the current stock price + stock_price = stock.history(period="1mo")['Close'].iloc[-1] + + # Get EPS (Earnings per Share) + eps = stock.info.get('trailingEps') + + # Get P/E ratio + pe_ratio = None + if eps: + pe_ratio = round(stock_price / eps, 2) + + # Get P/B ratio + book_value = stock.info.get('bookValue') + pb_ratio = None + if book_value: + pb_ratio = round(stock_price / book_value, 2) + + # Get Return on Equity (ROE) + roe = stock.info.get('returnOnEquity') + if roe is not None: + roe = round(roe * 100, 2) # Convert to percentage + + # Return the collected data as a dictionary + return { + 'Ticker': ticker, + 'Stock_Price': stock_price, # Removed space + 'EPS': eps, + 'PE_Ratio': pe_ratio, # Changed to use underscore + 'PB_Ratio': pb_ratio, # Changed to use underscore + 'ROE_Percentage': roe, # Removed special characters and used underscores + } + + +# Function to collect data for multiple stocks +def collect_metrics(tickers): + data = [] + for ticker in tickers: + stock_data = get_metrics(ticker) + data.append(stock_data) + + # Create a DataFrame to store the results + df = pd.DataFrame(data) + return df + + +# Example: Collect data for multiple stock tickers +tickers = ['AAPL', 'MSFT', 'GOOGL'] # Add more tickers as needed +financial_data = collect_metrics(tickers) + +print(financial_data) + +# Initialize Spark session with Delta support +spark = SparkSession.builder \ + .appName("Stock Data to Delta Lake") \ + .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.1") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + +spark_df = spark.createDataFrame(financial_data) + +delta_table_path = "../data/" + +spark_df.write.format("delta").mode("overwrite").save(delta_table_path) + +delta_df = spark.read.format("delta").load(delta_table_path) + +delta_df.show() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0178332 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +yfinance==0.2.44 +pandas==2.2.3 +numpy==2.1.2 +matplotlib==3.9.2 +openai==1.52.0 +pyspark==3.5.3 +delta-spark==3.2.1 \ No newline at end of file