Merge pull request #6 from SermetPekin/AdamOpt

Adam optimization
SermetPekin · Dec 11, 2024 · ac6e609 · ac6e609
2 parents fddfec3 + 3f6b6b9
commit ac6e609
Show file tree

Hide file tree

Showing 15 changed files with 1,636 additions and 199 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,8 +9,11 @@ ignore*.*
 ignore*/
 --*/
 --*.*
+ig_*/
+ig_*.*
 
 data/
 FashionMNIST/
 raw/
 *.pth
+
diff --git a/Untitled.ipynb b/Untitled.ipynb
diff --git a/Untitled1.ipynb b/Untitled1.ipynb
diff --git a/comparison.py b/comparison.py
@@ -0,0 +1,7 @@
+from micrograd.generics.micrograd_ import with_micrograd
+from micrograd.generics.torch_ import with_torch
+
+
+with_micrograd(100 , 0.01 , 5)
+
+with_torch(100 , 0.01 , 5 )
diff --git a/example_adam.py b/example_adam.py
@@ -0,0 +1,45 @@
+# Example: Training a simple MLP
+import random
+from typing import List
+
+from micrograd.engine import Value
+from micrograd.nn import MLP
+import time
+from micrograd.adam import Adam
+
+SECONDS_TO_WAIT = 0  # 0.1
+
+# Create random data
+inputs = [[random.uniform(-1, 1) for _ in range(3)] for _ in range(10)]
+targets = [random.uniform(-1, 1) for _ in range(10)]
+
+# Define the MLP
+mlp: MLP = MLP(3, [4, 4, 1])  # 3 inputs, 2 hidden layers with 4 neurons each, 1 output
+optimizer = Adam(mlp.parameters(), lr=0.01)
+
+
+# Training loop
+learning_rate = 0.01
+
+for epoch in range(100):
+
+    # Forward pass
+    predictions: List[Value] = [mlp(x) for x in inputs]
+    # Mean Squared Error
+    loss: Value = sum((pred - target) ** 2 for pred, target in zip(predictions, targets))
+    assert isinstance(loss, Value) , 'it is not Value '
+
+    # Zero gradients
+    # for p in mlp.parameters():
+    #     p.grad = 0
+    optimizer.zero_grad() # Adam optimizer instead of SGD
+    # Backward pass
+    loss.backward()
+
+    optimizer.step()
+
+    # Gradient descent step
+    # for p in mlp.parameters():
+    #     p.data -= learning_rate * p.grad
+
+    print(f"Epoch {epoch}, Loss: {loss.data:0.03f}")
diff --git a/main_adam.py b/main_adam.py
@@ -0,0 +1,43 @@
+from micrograd import Value
+from micrograd import MLP
+import random
+from micrograd.adam import Adam
+
+# Simple training data (XOR example)
+inputs = [
+    [Value(0.0), Value(0.0)],
+    [Value(0.0), Value(1.0)],
+    [Value(1.0), Value(0.0)],
+    [Value(1.0), Value(1.0)]
+]
+targets = [Value(0.0), Value(1.0), Value(1.0), Value(0.0)]
+
+# Define the model
+model = MLP(2, [4, 1])  # 2 input neurons, 1 hidden layer with 4 neurons, 1 output neuron
+
+# Define the optimizer
+optimizer = Adam(model.parameters(), lr=0.01)
+
+# Training loop
+for epoch in range(1000):
+    # Forward pass
+    total_loss = Value(0.0)
+    for x, y in zip(inputs, targets):
+        pred = model(x)[0]  # Assume model returns a list of outputs
+        loss = (pred - y).pow(2)
+        total_loss += loss
+
+    # Backward pass
+    optimizer.zero_grad()
+    total_loss.backward()
+
+    # Update weights
+    optimizer.step()
+
+    # Print loss every 100 epochs
+    if epoch % 100 == 0:
+        print(f'Epoch {epoch}, Loss: {total_loss.data:.4f}')
+
+# Test the model
+for x in inputs:
+    print(f'Input: {x[0].data}, {x[1].data} -> Prediction: {model(x)[0].data:.4f}')
diff --git a/micrograd/__init__.py b/micrograd/__init__.py
@@ -2,7 +2,7 @@
 from micrograd.nn import MLP, Neuron, Trainer, Optimizer, OptimizerForComparison, TrainerForComparison
 from micrograd.graph import draw_dot
 from micrograd.activation_functions import Activation
-
+from micrograd.data import iris_data
 __all__ = [
     "Value",
     "draw_dot",
@@ -11,4 +11,5 @@
     "Trainer",
     "Optimizer",
     "Activation",
+    "iris_data"
 ]
diff --git a/micrograd/adam.py b/micrograd/adam.py
@@ -0,0 +1,39 @@
+import math
+
+class Adam:
+    def __init__(self, params, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8):
+        self.params = params  # List of parameters (Value objects)
+        self.lr = lr          # Learning rate
+        self.beta1 = beta1    # Decay rate for first moment estimates
+        self.beta2 = beta2    # Decay rate for second moment estimates
+        self.eps = eps        # Small value to prevent division by zero
+        self.t = 0            # Time step
+
+        # Initialize first and second moment vectors (m and v)
+        self.m = {p: 0 for p in params}
+        self.v = {p: 0 for p in params}
+
+    def step(self):
+        self.t += 1
+        for p in self.params:
+            if p.grad is None:
+                continue
+
+            # Update biased first moment estimate (m)
+            self.m[p] = self.beta1 * self.m[p] + (1 - self.beta1) * p.grad
+
+            # Update biased second raw moment estimate (v)
+            self.v[p] = self.beta2 * self.v[p] + (1 - self.beta2) * (p.grad ** 2)
+
+            # Bias correction for first moment estimate
+            m_hat = self.m[p] / (1 - self.beta1 ** self.t)
+
+            # Bias correction for second moment estimate
+            v_hat = self.v[p] / (1 - self.beta2 ** self.t)
+
+            # Update the parameters
+            p.data -= self.lr * m_hat / (math.sqrt(v_hat) + self.eps)
+
+    def zero_grad(self):
+        for p in self.params:
+            p.grad = 0
diff --git a/micrograd/data.py b/micrograd/data.py
@@ -0,0 +1,26 @@
+
+import pandas as pd
+
+def iris_data():
+    """
+    Iris data
+
+    Fisher, R. (1936). Iris [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C56C76.
+
+    below we are using a popular github gist that includes data.
+    scikit learn package datasets module also may be used.
+
+    """
+    def fnc(d: str):
+        dict_ = {
+            'Setosa': 0,
+            'Versicolor': 1,
+            'Virginica': 2,
+
+        }
+        return dict_.get(d, d)
+    # Iris data
+    url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv"
+    df = pd.read_csv(url)
+    df['variety'] = df['variety'].apply(fnc)
+    return df
diff --git a/micrograd/generics/__init__.py b/micrograd/generics/__init__.py
diff --git a/micrograd/generics/generic_base_.py b/micrograd/generics/generic_base_.py
@@ -0,0 +1,85 @@
+from sklearn.datasets import load_iris
+import pandas as pd
+from typing import Tuple
+from sklearn.model_selection import train_test_split
+import pandas as pd
+from abc import ABC, abstractmethod
+from micrograd.data import iris_data
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from example_adam import epoch
+from micrograd.adam import Adam
+import torch.nn as nn
+import torch.optim as optim
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import KFold
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import KFold
+import numpy as np
+import matplotlib.pyplot as plt
+from torch.utils.data import DataLoader
+from torchvision import datasets
+from torchvision.transforms import ToTensor
+
+
+def iris_data_xy():
+    from sklearn.datasets import load_iris
+    from sklearn.preprocessing import StandardScaler
+    data = load_iris()
+    X = data.data
+    y = data.target
+
+    # Standardize the dataset
+    scaler = StandardScaler()
+    X = scaler.fit_transform(X)
+
+    # Convert to PyTorch tensors
+    X_tensor = torch.tensor(X, dtype=torch.float32)
+    y_tensor = torch.tensor(y, dtype=torch.long)
+
+    return X_tensor, y_tensor
+def process_data_for_torch_and_micrograd(df: pd.DataFrame):
+    X = df.drop('variety', axis=1)
+    y = df['variety']
+    X = X.values
+    y = y.values
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+    X_train = torch.FloatTensor(X_train)
+    X_test = torch.FloatTensor(X_test)
+    y_train = torch.LongTensor(y_train)
+    y_test = torch.LongTensor(y_test)
+    return X_train, X_test, y_train, y_test
+
+
+def get_iris_data_split() -> Tuple['X_train', 'X_test', 'y_train', 'y_test']:
+    df = iris_data()
+    return process_data_for_torch_and_micrograd(df)
+
+
+
+
+class SPNeuralNetworkImplBase(ABC):
+
+    def __init__(self):
+        ...
+    @staticmethod
+    def load_data():
+        ...
+
+    @abstractmethod
+    def train(self):
+        ...
+
+    @abstractmethod
+    def eval(self):
+        ...
+
+    @abstractmethod
+    def show(self):
+        ...
+
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,8 +9,11 @@ ignore*.* @@
     ignore*/
     --*/
     --*.*
+    ig_*/
+    ig_*.*
     data/
     FashionMNIST/
     raw/
     *.pth