From 79da61a3b9bb06065a544eefd9e89ad4cd283981 Mon Sep 17 00:00:00 2001 From: unusualpseudo Date: Tue, 22 Oct 2024 00:13:54 +0200 Subject: [PATCH] feat(stockbot): init Signed-off-by: unusualpseudo --- .github/workflows/ci.yaml | 54 ++++++++++++++++++++++++++ .gitignore | 4 ++ Dockerfile | 20 ++++++++++ README.md | 1 + app/fundamental_analysis.py | 76 +++++++++++++++++++++++++++++++++++++ requirements.txt | 7 ++++ 6 files changed, 162 insertions(+) create mode 100644 .github/workflows/ci.yaml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 app/fundamental_analysis.py create mode 100644 requirements.txt diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..e9ea24a --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,54 @@ +name: CI + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} +jobs: + ci: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + attestations: write + id-token: write + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + id: push + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v1 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f99d414 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/data/ +/logs/ +/venv/ +*.iml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..73a4c3c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +# Use the Spark image as a base +FROM spark:3.5.3-scala2.12-java17-ubuntu + +USER root + +RUN set -ex; \ + apt-get update; \ + apt-get install -y python3 python3-pip; \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY . /app +RUN pip install -r requirements.txt + + +VOLUME /app/data +VOLUME /app/logs + +# Command to run the Python script and redirect output to a log file with a timestamp +CMD ["sh", "-c", "python3 ./app/fundamental_analysis.py > /app/logs/fundamental_analysis_$(date +'%Y%m%d_%H%M%S').log 2>&1"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..f1d0b6b --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +[![CI](https://github.com/unusualpseudo/stockbot/actions/workflows/ci.yaml/badge.svg)](https://github.com/unusualpseudo/stockbot/actions/workflows/ci.yaml) \ No newline at end of file diff --git a/app/fundamental_analysis.py b/app/fundamental_analysis.py new file mode 100644 index 0000000..bc2c93b --- /dev/null +++ b/app/fundamental_analysis.py @@ -0,0 +1,76 @@ +import yfinance as yf +import pandas as pd +from pyspark.sql import SparkSession + +# Function to collect financial data for a single stock +def get_metrics(ticker): + stock = yf.Ticker(ticker) + + # Get the current stock price + stock_price = stock.history(period="1mo")['Close'].iloc[-1] + + # Get EPS (Earnings per Share) + eps = stock.info.get('trailingEps') + + # Get P/E ratio + pe_ratio = None + if eps: + pe_ratio = round(stock_price / eps, 2) + + # Get P/B ratio + book_value = stock.info.get('bookValue') + pb_ratio = None + if book_value: + pb_ratio = round(stock_price / book_value, 2) + + # Get Return on Equity (ROE) + roe = stock.info.get('returnOnEquity') + if roe is not None: + roe = round(roe * 100, 2) # Convert to percentage + + # Return the collected data as a dictionary + return { + 'Ticker': ticker, + 'Stock_Price': stock_price, # Removed space + 'EPS': eps, + 'PE_Ratio': pe_ratio, # Changed to use underscore + 'PB_Ratio': pb_ratio, # Changed to use underscore + 'ROE_Percentage': roe, # Removed special characters and used underscores + } + + +# Function to collect data for multiple stocks +def collect_metrics(tickers): + data = [] + for ticker in tickers: + stock_data = get_metrics(ticker) + data.append(stock_data) + + # Create a DataFrame to store the results + df = pd.DataFrame(data) + return df + + +# Example: Collect data for multiple stock tickers +tickers = ['AAPL', 'MSFT', 'GOOGL'] # Add more tickers as needed +financial_data = collect_metrics(tickers) + +print(financial_data) + +# Initialize Spark session with Delta support +spark = SparkSession.builder \ + .appName("Stock Data to Delta Lake") \ + .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.1") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .getOrCreate() + +spark_df = spark.createDataFrame(financial_data) + +delta_table_path = "../data/" + +spark_df.write.format("delta").mode("overwrite").save(delta_table_path) + +delta_df = spark.read.format("delta").load(delta_table_path) + +delta_df.show() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0178332 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +yfinance==0.2.44 +pandas==2.2.3 +numpy==2.1.2 +matplotlib==3.9.2 +openai==1.52.0 +pyspark==3.5.3 +delta-spark==3.2.1 \ No newline at end of file