From 79da61a3b9bb06065a544eefd9e89ad4cd283981 Mon Sep 17 00:00:00 2001
From: unusualpseudo <hatimahadri@proton.me>
Date: Tue, 22 Oct 2024 00:13:54 +0200
Subject: [PATCH] feat(stockbot): init

Signed-off-by: unusualpseudo <hatimahadri@proton.me>
---
 .github/workflows/ci.yaml   | 54 ++++++++++++++++++++++++++
 .gitignore                  |  4 ++
 Dockerfile                  | 20 ++++++++++
 README.md                   |  1 +
 app/fundamental_analysis.py | 76 +++++++++++++++++++++++++++++++++++++
 requirements.txt            |  7 ++++
 6 files changed, 162 insertions(+)
 create mode 100644 .github/workflows/ci.yaml
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 README.md
 create mode 100644 app/fundamental_analysis.py
 create mode 100644 requirements.txt

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 0000000..e9ea24a
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,54 @@
+name: CI
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+jobs:
+  ci:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      attestations: write
+      id-token: write
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+        with:
+         images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      - name: Build and push Docker image
+        id: push
+        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+      - name: Generate artifact attestation
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
+          subject-digest: ${{ steps.push.outputs.digest }}
+          push-to-registry: true
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f99d414
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+/data/
+/logs/
+/venv/
+*.iml
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..73a4c3c
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,20 @@
+# Use the Spark image as a base
+FROM spark:3.5.3-scala2.12-java17-ubuntu
+
+USER root
+
+RUN set -ex; \
+    apt-get update; \
+    apt-get install -y python3 python3-pip; \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY . /app
+RUN pip install -r requirements.txt
+
+
+VOLUME /app/data
+VOLUME /app/logs
+
+# Command to run the Python script and redirect output to a log file with a timestamp
+CMD ["sh", "-c", "python3 ./app/fundamental_analysis.py > /app/logs/fundamental_analysis_$(date +'%Y%m%d_%H%M%S').log 2>&1"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f1d0b6b
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+[![CI](https://github.com/unusualpseudo/stockbot/actions/workflows/ci.yaml/badge.svg)](https://github.com/unusualpseudo/stockbot/actions/workflows/ci.yaml)
\ No newline at end of file
diff --git a/app/fundamental_analysis.py b/app/fundamental_analysis.py
new file mode 100644
index 0000000..bc2c93b
--- /dev/null
+++ b/app/fundamental_analysis.py
@@ -0,0 +1,76 @@
+import yfinance as yf
+import pandas as pd
+from pyspark.sql import SparkSession
+
+# Function to collect financial data for a single stock
+def get_metrics(ticker):
+    stock = yf.Ticker(ticker)
+
+    # Get the current stock price
+    stock_price = stock.history(period="1mo")['Close'].iloc[-1]
+
+    # Get EPS (Earnings per Share)
+    eps = stock.info.get('trailingEps')
+
+    # Get P/E ratio
+    pe_ratio = None
+    if eps:
+        pe_ratio = round(stock_price / eps, 2)
+
+    # Get P/B ratio
+    book_value = stock.info.get('bookValue')
+    pb_ratio = None
+    if book_value:
+        pb_ratio = round(stock_price / book_value, 2)
+
+    # Get Return on Equity (ROE)
+    roe = stock.info.get('returnOnEquity')
+    if roe is not None:
+        roe = round(roe * 100, 2)  # Convert to percentage
+
+    # Return the collected data as a dictionary
+    return {
+        'Ticker': ticker,
+        'Stock_Price': stock_price,  # Removed space
+        'EPS': eps,
+        'PE_Ratio': pe_ratio,        # Changed to use underscore
+        'PB_Ratio': pb_ratio,        # Changed to use underscore
+        'ROE_Percentage': roe,       # Removed special characters and used underscores
+    }
+
+
+# Function to collect data for multiple stocks
+def collect_metrics(tickers):
+    data = []
+    for ticker in tickers:
+        stock_data = get_metrics(ticker)
+        data.append(stock_data)
+
+    # Create a DataFrame to store the results
+    df = pd.DataFrame(data)
+    return df
+
+
+# Example: Collect data for multiple stock tickers
+tickers = ['AAPL', 'MSFT', 'GOOGL']  # Add more tickers as needed
+financial_data = collect_metrics(tickers)
+
+print(financial_data)
+
+# Initialize Spark session with Delta support
+spark = SparkSession.builder \
+    .appName("Stock Data to Delta Lake") \
+    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.1") \
+    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
+    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
+    .getOrCreate()
+
+spark_df = spark.createDataFrame(financial_data)
+
+delta_table_path = "../data/"
+
+spark_df.write.format("delta").mode("overwrite").save(delta_table_path)
+
+delta_df = spark.read.format("delta").load(delta_table_path)
+
+delta_df.show()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..0178332
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+yfinance==0.2.44
+pandas==2.2.3
+numpy==2.1.2
+matplotlib==3.9.2
+openai==1.52.0
+pyspark==3.5.3
+delta-spark==3.2.1
\ No newline at end of file