forked from karpathy/micrograd
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from SermetPekin/AdamOpt
Adam optimization
- Loading branch information
Showing
15 changed files
with
1,636 additions
and
199 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,8 +9,11 @@ ignore*.* | |
ignore*/ | ||
--*/ | ||
--*.* | ||
ig_*/ | ||
ig_*.* | ||
|
||
data/ | ||
FashionMNIST/ | ||
raw/ | ||
*.pth | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from micrograd.generics.micrograd_ import with_micrograd | ||
from micrograd.generics.torch_ import with_torch | ||
|
||
|
||
with_micrograd(100 , 0.01 , 5) | ||
|
||
with_torch(100 , 0.01 , 5 ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Example: Training a simple MLP | ||
import random | ||
from typing import List | ||
|
||
from micrograd.engine import Value | ||
from micrograd.nn import MLP | ||
import time | ||
from micrograd.adam import Adam | ||
|
||
SECONDS_TO_WAIT = 0 # 0.1 | ||
|
||
# Create random data | ||
inputs = [[random.uniform(-1, 1) for _ in range(3)] for _ in range(10)] | ||
targets = [random.uniform(-1, 1) for _ in range(10)] | ||
|
||
# Define the MLP | ||
mlp: MLP = MLP(3, [4, 4, 1]) # 3 inputs, 2 hidden layers with 4 neurons each, 1 output | ||
optimizer = Adam(mlp.parameters(), lr=0.01) | ||
|
||
|
||
# Training loop | ||
learning_rate = 0.01 | ||
|
||
for epoch in range(100): | ||
|
||
# Forward pass | ||
predictions: List[Value] = [mlp(x) for x in inputs] | ||
# Mean Squared Error | ||
loss: Value = sum((pred - target) ** 2 for pred, target in zip(predictions, targets)) | ||
assert isinstance(loss, Value) , 'it is not Value ' | ||
|
||
# Zero gradients | ||
# for p in mlp.parameters(): | ||
# p.grad = 0 | ||
optimizer.zero_grad() # Adam optimizer instead of SGD | ||
# Backward pass | ||
loss.backward() | ||
|
||
optimizer.step() | ||
|
||
# Gradient descent step | ||
# for p in mlp.parameters(): | ||
# p.data -= learning_rate * p.grad | ||
|
||
print(f"Epoch {epoch}, Loss: {loss.data:0.03f}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from micrograd import Value | ||
from micrograd import MLP | ||
import random | ||
from micrograd.adam import Adam | ||
|
||
# Simple training data (XOR example) | ||
inputs = [ | ||
[Value(0.0), Value(0.0)], | ||
[Value(0.0), Value(1.0)], | ||
[Value(1.0), Value(0.0)], | ||
[Value(1.0), Value(1.0)] | ||
] | ||
targets = [Value(0.0), Value(1.0), Value(1.0), Value(0.0)] | ||
|
||
# Define the model | ||
model = MLP(2, [4, 1]) # 2 input neurons, 1 hidden layer with 4 neurons, 1 output neuron | ||
|
||
# Define the optimizer | ||
optimizer = Adam(model.parameters(), lr=0.01) | ||
|
||
# Training loop | ||
for epoch in range(1000): | ||
# Forward pass | ||
total_loss = Value(0.0) | ||
for x, y in zip(inputs, targets): | ||
pred = model(x)[0] # Assume model returns a list of outputs | ||
loss = (pred - y).pow(2) | ||
total_loss += loss | ||
|
||
# Backward pass | ||
optimizer.zero_grad() | ||
total_loss.backward() | ||
|
||
# Update weights | ||
optimizer.step() | ||
|
||
# Print loss every 100 epochs | ||
if epoch % 100 == 0: | ||
print(f'Epoch {epoch}, Loss: {total_loss.data:.4f}') | ||
|
||
# Test the model | ||
for x in inputs: | ||
print(f'Input: {x[0].data}, {x[1].data} -> Prediction: {model(x)[0].data:.4f}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import math | ||
|
||
class Adam: | ||
def __init__(self, params, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8): | ||
self.params = params # List of parameters (Value objects) | ||
self.lr = lr # Learning rate | ||
self.beta1 = beta1 # Decay rate for first moment estimates | ||
self.beta2 = beta2 # Decay rate for second moment estimates | ||
self.eps = eps # Small value to prevent division by zero | ||
self.t = 0 # Time step | ||
|
||
# Initialize first and second moment vectors (m and v) | ||
self.m = {p: 0 for p in params} | ||
self.v = {p: 0 for p in params} | ||
|
||
def step(self): | ||
self.t += 1 | ||
for p in self.params: | ||
if p.grad is None: | ||
continue | ||
|
||
# Update biased first moment estimate (m) | ||
self.m[p] = self.beta1 * self.m[p] + (1 - self.beta1) * p.grad | ||
|
||
# Update biased second raw moment estimate (v) | ||
self.v[p] = self.beta2 * self.v[p] + (1 - self.beta2) * (p.grad ** 2) | ||
|
||
# Bias correction for first moment estimate | ||
m_hat = self.m[p] / (1 - self.beta1 ** self.t) | ||
|
||
# Bias correction for second moment estimate | ||
v_hat = self.v[p] / (1 - self.beta2 ** self.t) | ||
|
||
# Update the parameters | ||
p.data -= self.lr * m_hat / (math.sqrt(v_hat) + self.eps) | ||
|
||
def zero_grad(self): | ||
for p in self.params: | ||
p.grad = 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
|
||
import pandas as pd | ||
|
||
def iris_data(): | ||
""" | ||
Iris data | ||
Fisher, R. (1936). Iris [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C56C76. | ||
below we are using a popular github gist that includes data. | ||
scikit learn package datasets module also may be used. | ||
""" | ||
def fnc(d: str): | ||
dict_ = { | ||
'Setosa': 0, | ||
'Versicolor': 1, | ||
'Virginica': 2, | ||
|
||
} | ||
return dict_.get(d, d) | ||
# Iris data | ||
url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv" | ||
df = pd.read_csv(url) | ||
df['variety'] = df['variety'].apply(fnc) | ||
return df |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from sklearn.datasets import load_iris | ||
import pandas as pd | ||
from typing import Tuple | ||
from sklearn.model_selection import train_test_split | ||
import pandas as pd | ||
from abc import ABC, abstractmethod | ||
from micrograd.data import iris_data | ||
|
||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from example_adam import epoch | ||
from micrograd.adam import Adam | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
from sklearn.metrics import roc_auc_score | ||
from sklearn.model_selection import KFold | ||
from sklearn.preprocessing import StandardScaler | ||
from sklearn.model_selection import KFold | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from torch.utils.data import DataLoader | ||
from torchvision import datasets | ||
from torchvision.transforms import ToTensor | ||
|
||
|
||
def iris_data_xy(): | ||
from sklearn.datasets import load_iris | ||
from sklearn.preprocessing import StandardScaler | ||
data = load_iris() | ||
X = data.data | ||
y = data.target | ||
|
||
# Standardize the dataset | ||
scaler = StandardScaler() | ||
X = scaler.fit_transform(X) | ||
|
||
# Convert to PyTorch tensors | ||
X_tensor = torch.tensor(X, dtype=torch.float32) | ||
y_tensor = torch.tensor(y, dtype=torch.long) | ||
|
||
return X_tensor, y_tensor | ||
def process_data_for_torch_and_micrograd(df: pd.DataFrame): | ||
X = df.drop('variety', axis=1) | ||
y = df['variety'] | ||
X = X.values | ||
y = y.values | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | ||
X_train = torch.FloatTensor(X_train) | ||
X_test = torch.FloatTensor(X_test) | ||
y_train = torch.LongTensor(y_train) | ||
y_test = torch.LongTensor(y_test) | ||
return X_train, X_test, y_train, y_test | ||
|
||
|
||
def get_iris_data_split() -> Tuple['X_train', 'X_test', 'y_train', 'y_test']: | ||
df = iris_data() | ||
return process_data_for_torch_and_micrograd(df) | ||
|
||
|
||
|
||
|
||
class SPNeuralNetworkImplBase(ABC): | ||
|
||
def __init__(self): | ||
... | ||
@staticmethod | ||
def load_data(): | ||
... | ||
|
||
@abstractmethod | ||
def train(self): | ||
... | ||
|
||
@abstractmethod | ||
def eval(self): | ||
... | ||
|
||
@abstractmethod | ||
def show(self): | ||
... | ||
|
||
|
Oops, something went wrong.