Skip to content

Commit

Permalink
Merge pull request #6 from SermetPekin/AdamOpt
Browse files Browse the repository at this point in the history
Adam optimization
  • Loading branch information
SermetPekin authored Dec 11, 2024
2 parents fddfec3 + 3f6b6b9 commit ac6e609
Show file tree
Hide file tree
Showing 15 changed files with 1,636 additions and 199 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ ignore*.*
ignore*/
--*/
--*.*
ig_*/
ig_*.*

data/
FashionMNIST/
raw/
*.pth

705 changes: 705 additions & 0 deletions Untitled.ipynb

Large diffs are not rendered by default.

265 changes: 265 additions & 0 deletions Untitled1.ipynb

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from micrograd.generics.micrograd_ import with_micrograd
from micrograd.generics.torch_ import with_torch


with_micrograd(100 , 0.01 , 5)

with_torch(100 , 0.01 , 5 )
45 changes: 45 additions & 0 deletions example_adam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Example: Training a simple MLP
import random
from typing import List

from micrograd.engine import Value
from micrograd.nn import MLP
import time
from micrograd.adam import Adam

SECONDS_TO_WAIT = 0 # 0.1

# Create random data
inputs = [[random.uniform(-1, 1) for _ in range(3)] for _ in range(10)]
targets = [random.uniform(-1, 1) for _ in range(10)]

# Define the MLP
mlp: MLP = MLP(3, [4, 4, 1]) # 3 inputs, 2 hidden layers with 4 neurons each, 1 output
optimizer = Adam(mlp.parameters(), lr=0.01)


# Training loop
learning_rate = 0.01

for epoch in range(100):

# Forward pass
predictions: List[Value] = [mlp(x) for x in inputs]
# Mean Squared Error
loss: Value = sum((pred - target) ** 2 for pred, target in zip(predictions, targets))
assert isinstance(loss, Value) , 'it is not Value '

# Zero gradients
# for p in mlp.parameters():
# p.grad = 0
optimizer.zero_grad() # Adam optimizer instead of SGD
# Backward pass
loss.backward()

optimizer.step()

# Gradient descent step
# for p in mlp.parameters():
# p.data -= learning_rate * p.grad

print(f"Epoch {epoch}, Loss: {loss.data:0.03f}")
43 changes: 43 additions & 0 deletions main_adam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from micrograd import Value
from micrograd import MLP
import random
from micrograd.adam import Adam

# Simple training data (XOR example)
inputs = [
[Value(0.0), Value(0.0)],
[Value(0.0), Value(1.0)],
[Value(1.0), Value(0.0)],
[Value(1.0), Value(1.0)]
]
targets = [Value(0.0), Value(1.0), Value(1.0), Value(0.0)]

# Define the model
model = MLP(2, [4, 1]) # 2 input neurons, 1 hidden layer with 4 neurons, 1 output neuron

# Define the optimizer
optimizer = Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(1000):
# Forward pass
total_loss = Value(0.0)
for x, y in zip(inputs, targets):
pred = model(x)[0] # Assume model returns a list of outputs
loss = (pred - y).pow(2)
total_loss += loss

# Backward pass
optimizer.zero_grad()
total_loss.backward()

# Update weights
optimizer.step()

# Print loss every 100 epochs
if epoch % 100 == 0:
print(f'Epoch {epoch}, Loss: {total_loss.data:.4f}')

# Test the model
for x in inputs:
print(f'Input: {x[0].data}, {x[1].data} -> Prediction: {model(x)[0].data:.4f}')
3 changes: 2 additions & 1 deletion micrograd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from micrograd.nn import MLP, Neuron, Trainer, Optimizer, OptimizerForComparison, TrainerForComparison
from micrograd.graph import draw_dot
from micrograd.activation_functions import Activation

from micrograd.data import iris_data
__all__ = [
"Value",
"draw_dot",
Expand All @@ -11,4 +11,5 @@
"Trainer",
"Optimizer",
"Activation",
"iris_data"
]
39 changes: 39 additions & 0 deletions micrograd/adam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import math

class Adam:
def __init__(self, params, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8):
self.params = params # List of parameters (Value objects)
self.lr = lr # Learning rate
self.beta1 = beta1 # Decay rate for first moment estimates
self.beta2 = beta2 # Decay rate for second moment estimates
self.eps = eps # Small value to prevent division by zero
self.t = 0 # Time step

# Initialize first and second moment vectors (m and v)
self.m = {p: 0 for p in params}
self.v = {p: 0 for p in params}

def step(self):
self.t += 1
for p in self.params:
if p.grad is None:
continue

# Update biased first moment estimate (m)
self.m[p] = self.beta1 * self.m[p] + (1 - self.beta1) * p.grad

# Update biased second raw moment estimate (v)
self.v[p] = self.beta2 * self.v[p] + (1 - self.beta2) * (p.grad ** 2)

# Bias correction for first moment estimate
m_hat = self.m[p] / (1 - self.beta1 ** self.t)

# Bias correction for second moment estimate
v_hat = self.v[p] / (1 - self.beta2 ** self.t)

# Update the parameters
p.data -= self.lr * m_hat / (math.sqrt(v_hat) + self.eps)

def zero_grad(self):
for p in self.params:
p.grad = 0
26 changes: 26 additions & 0 deletions micrograd/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

import pandas as pd

def iris_data():
"""
Iris data
Fisher, R. (1936). Iris [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C56C76.
below we are using a popular github gist that includes data.
scikit learn package datasets module also may be used.
"""
def fnc(d: str):
dict_ = {
'Setosa': 0,
'Versicolor': 1,
'Virginica': 2,

}
return dict_.get(d, d)
# Iris data
url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv"
df = pd.read_csv(url)
df['variety'] = df['variety'].apply(fnc)
return df
Empty file added micrograd/generics/__init__.py
Empty file.
85 changes: 85 additions & 0 deletions micrograd/generics/generic_base_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from sklearn.datasets import load_iris
import pandas as pd
from typing import Tuple
from sklearn.model_selection import train_test_split
import pandas as pd
from abc import ABC, abstractmethod
from micrograd.data import iris_data


import torch
import torch.nn as nn
import torch.nn.functional as F
from example_adam import epoch
from micrograd.adam import Adam
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


def iris_data_xy():
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
data = load_iris()
X = data.data
y = data.target

# Standardize the dataset
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

return X_tensor, y_tensor
def process_data_for_torch_and_micrograd(df: pd.DataFrame):
X = df.drop('variety', axis=1)
y = df['variety']
X = X.values
y = y.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
return X_train, X_test, y_train, y_test


def get_iris_data_split() -> Tuple['X_train', 'X_test', 'y_train', 'y_test']:
df = iris_data()
return process_data_for_torch_and_micrograd(df)




class SPNeuralNetworkImplBase(ABC):

def __init__(self):
...
@staticmethod
def load_data():
...

@abstractmethod
def train(self):
...

@abstractmethod
def eval(self):
...

@abstractmethod
def show(self):
...


Loading

0 comments on commit ac6e609

Please sign in to comment.