feat(stockbot): init

Signed-off-by: unusualpseudo <[email protected]>
unusualpseudo · Oct 21, 2024 · 0b55e37 · 0b55e37
commit 0b55e37
Show file tree

Hide file tree

Showing 5 changed files with 157 additions and 0 deletions.
diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
@@ -0,0 +1,50 @@
+name: Python Build and Docker Image Build/Push
+
+on:
+  push:
+    branches:
+      - main  # Trigger on push to the main branch
+  pull_request:
+    branches:
+      - main  # Trigger on pull request to the main branch
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.11.2
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.11.2
+          cache: 'pip' # caching pip dependencies
+
+      - name: Install dependencies
+        run: |
+          python3 -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Generate Token
+        id: generate-token
+        uses: tibdex/[email protected]
+        with:
+          app_id: "${{ secrets.BOT_APP_ID }}"
+          private_key: "${{ secrets.BOT_APP_PRIVATE_KEY }}"
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build Docker image
+        run: |
+          docker build -t ghcr.io/unusualpseudo/stockbot:latest .
+
+      - name: Push Docker image
+        run: |
+          docker push ghcr.io/unusualpseudo/stockbot:latest
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+/data/
+/logs/
+/venv/
+*.iml
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,20 @@
+# Use the Spark image as a base
+FROM spark:3.5.3-scala2.12-java17-ubuntu
+
+USER root
+
+RUN set -ex; \
+    apt-get update; \
+    apt-get install -y python3 python3-pip; \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY . /app
+RUN pip install -r requirements.txt
+
+
+VOLUME /app/data
+VOLUME /app/logs
+
+# Command to run the Python script and redirect output to a log file with a timestamp
+CMD ["sh", "-c", "python3 ./app/fundamental_analysis.py > /app/logs/fundamental_analysis_$(date +'%Y%m%d_%H%M%S').log 2>&1"]
diff --git a/app/fundamental_analysis.py b/app/fundamental_analysis.py
@@ -0,0 +1,76 @@
+import yfinance as yf
+import pandas as pd
+from pyspark.sql import SparkSession
+
+# Function to collect financial data for a single stock
+def get_metrics(ticker):
+    stock = yf.Ticker(ticker)
+
+    # Get the current stock price
+    stock_price = stock.history(period="1mo")['Close'].iloc[-1]
+
+    # Get EPS (Earnings per Share)
+    eps = stock.info.get('trailingEps')
+
+    # Get P/E ratio
+    pe_ratio = None
+    if eps:
+        pe_ratio = round(stock_price / eps, 2)
+
+    # Get P/B ratio
+    book_value = stock.info.get('bookValue')
+    pb_ratio = None
+    if book_value:
+        pb_ratio = round(stock_price / book_value, 2)
+
+    # Get Return on Equity (ROE)
+    roe = stock.info.get('returnOnEquity')
+    if roe is not None:
+        roe = round(roe * 100, 2)  # Convert to percentage
+
+    # Return the collected data as a dictionary
+    return {
+        'Ticker': ticker,
+        'Stock_Price': stock_price,  # Removed space
+        'EPS': eps,
+        'PE_Ratio': pe_ratio,        # Changed to use underscore
+        'PB_Ratio': pb_ratio,        # Changed to use underscore
+        'ROE_Percentage': roe,       # Removed special characters and used underscores
+    }
+
+
+# Function to collect data for multiple stocks
+def collect_metrics(tickers):
+    data = []
+    for ticker in tickers:
+        stock_data = get_metrics(ticker)
+        data.append(stock_data)
+
+    # Create a DataFrame to store the results
+    df = pd.DataFrame(data)
+    return df
+
+
+# Example: Collect data for multiple stock tickers
+tickers = ['AAPL', 'MSFT', 'GOOGL']  # Add more tickers as needed
+financial_data = collect_metrics(tickers)
+
+print(financial_data)
+
+# Initialize Spark session with Delta support
+spark = SparkSession.builder \
+    .appName("Stock Data to Delta Lake") \
+    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.1") \
+    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
+    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
+    .getOrCreate()
+
+spark_df = spark.createDataFrame(financial_data)
+
+delta_table_path = "../data/"
+
+spark_df.write.format("delta").mode("overwrite").save(delta_table_path)
+
+delta_df = spark.read.format("delta").load(delta_table_path)
+
+delta_df.show()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,7 @@
+yfinance==0.2.44
+pandas==2.2.3
+numpy==2.1.2
+matplotlib==3.9.2
+openai==1.52.0
+pyspark==3.5.3
+delta-spark==3.2.1
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    /data/
+    /logs/
+    /venv/
+    *.iml