From fb0b29ae551246d6d3347d2026c30becbe16a92e Mon Sep 17 00:00:00 2001 From: Sermet Pekin Date: Thu, 5 Dec 2024 14:26:23 +0300 Subject: [PATCH] ensemble --- example_trainer2.py | 111 ++++++++++++++++++++++++++++++++++++++++++ micrograd/__init__.py | 2 +- micrograd/nn.py | 94 ++++++++++++++++++++++++++++++++++- test/test_trainer2.py | 4 +- 4 files changed, 206 insertions(+), 5 deletions(-) create mode 100644 example_trainer2.py diff --git a/example_trainer2.py b/example_trainer2.py new file mode 100644 index 00000000..91e6f584 --- /dev/null +++ b/example_trainer2.py @@ -0,0 +1,111 @@ +import pytest +import torch +from micrograd import Value, MLP, Optimizer, Trainer, OptimizerForComparison, TrainerForComparison + + +class TorchMLP(torch.nn.Module): + def __init__(self): + super().__init__() + self.fc1 = torch.nn.Linear(2, 3) + self.fc2 = torch.nn.Linear(3, 1) + + def forward(self, x): + x = self.fc1(x) + x = torch.relu(x) + x = self.fc2(x) + return x + + +# Loss function for micrograd +def mean_squared_error(predicted: Value, target: Value) -> Value: + return (predicted - target) ** 2 + + +def initialize_weights_micrograd(model): + for layer in model.layers: + for neuron in layer.neurons: + for weight in neuron.weights: + weight.data = 0.5 # Example fixed value + neuron.bias.data = 0.1 # Example fixed value + + +def initialize_weights_torch(model): + with torch.no_grad(): + model.fc1.weight.fill_(0.5) + model.fc1.bias.fill_(0.1) + model.fc2.weight.fill_(0.5) + model.fc2.bias.fill_(0.1) + + +def data1(): + inputs = [ + [Value(1.0), Value(2.0)], + [Value(2.0), Value(3.0)], + [Value(3.0), Value(4.0)], + [Value(4.0), Value(5.0)] + ] + targets = [Value(9.0), Value(14.0), Value(19.0), Value(24.0)] + + torch_inputs = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]]) + torch_targets = torch.tensor([[9.0], [14.0], [19.0], [24.0]]) + return inputs, targets, torch_inputs, torch_targets + + +def data2(): + inputs = [[Value(i), Value(i + 1)] for i in range(1, 21)] + targets = [Value(2 * i + 3 * (i + 1) + 1) for i in range(1, 21)] + torch_inputs = torch.tensor([[i, i + 1] for i in range(1, 21)], dtype=torch.float32) + torch_targets = torch.tensor([[2 * i + 3 * (i + 1) + 1] for i in range(1, 21)], dtype=torch.float32) + return inputs, targets, torch_inputs, torch_targets + + +# @pytest.mark.skipif(True, reason='TODO') +def compare_micrograd_vs_torch(): + # Dataset + inputs, targets, torch_inputs, torch_targets = data1() + + # Micrograd Model + micrograd_model = MLP(input_size=2, layer_sizes=[3, 1]) + micrograd_optimizer = OptimizerForComparison() + micrograd_trainer = TrainerForComparison( + model=micrograd_model, + loss_fn=mean_squared_error, + optimizer=micrograd_optimizer, + num_clones=5 + ) + + # initialize_weights_micrograd(micrograd_model) + + EPOCHS = int(10000) + # Train Micrograd Model + micrograd_trainer.train(inputs, targets, epochs=EPOCHS, learning_rate=0.01) + + # PyTorch Model + torch_model = TorchMLP() + # initialize_weights_torch(torch_model) + torch_optimizer = torch.optim.SGD(torch_model.parameters(), lr=0.01) + loss_fn = torch.nn.MSELoss() + + # Train PyTorch Model + for epoch in range(EPOCHS): + torch_optimizer.zero_grad() + predictions = torch_model(torch_inputs) + loss = loss_fn(predictions, torch_targets) + loss.backward() + torch_optimizer.step() + + # Compare Predictions + micrograd_test_input = [Value(5.0), Value(6.0)] + micrograd_prediction = micrograd_model(micrograd_test_input).data + + torch_test_input = torch.tensor([[5.0, 6.0]]) + torch_prediction = torch_model(torch_test_input).item() + + msg = f'micrograd_prediction: {micrograd_prediction} torch_prediction : {torch_prediction}' + print(msg) + # Assert that predictions are close + # assert pytest.approx(micrograd_prediction, + # rel=1e-2) == torch_prediction, f'micrograd_prediction: {micrograd_prediction} torch_prediction : {torch_prediction}' + + +compare_micrograd_vs_torch() diff --git a/micrograd/__init__.py b/micrograd/__init__.py index 93d7e22d..fcc1e2d4 100644 --- a/micrograd/__init__.py +++ b/micrograd/__init__.py @@ -1,5 +1,5 @@ from micrograd.engine import Value -from micrograd.nn import MLP, Neuron, Trainer, Optimizer, OptimizerForComparison +from micrograd.nn import MLP, Neuron, Trainer, Optimizer, OptimizerForComparison, TrainerForComparison from micrograd.graph import draw_dot from micrograd.activation_functions import Activation diff --git a/micrograd/nn.py b/micrograd/nn.py index 4d6b5e93..f9ed80c8 100644 --- a/micrograd/nn.py +++ b/micrograd/nn.py @@ -1,5 +1,6 @@ import random from typing import List, Callable, Optional +from abc import ABC, abstractmethod from micrograd.engine import Value, Weight, Bias @@ -110,8 +111,9 @@ def parameters(self) -> List[Value]: def __repr__(self) -> str: return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]" - -from abc import ABC, abstractmethod + def clone(self): + import copy + return copy.deepcopy(self) class OptimizerAbstract(ABC): @@ -134,6 +136,9 @@ def __init__(self, parameters: List[Value] = (), learning_rate: float = 0.01, mo self.weight_decay = weight_decay self.velocities = {} # {id(param): 0.0 for param in parameters} # Momentum storage + def __call__(self, *args, **kw) -> 'OptimizerForComparison': + return self + def step(self, parameters=(), learning_rate: float = None) -> None: if parameters: self.parameters = parameters @@ -194,3 +199,88 @@ def train( self.model.zero_grad() print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(inputs):.4f}") + + +class TrainerForComparison: + def __init__( + self, + model: Module, + loss_fn: Callable[[Value, Value], Value], + optimizer: OptimizerAbstract, + num_clones: int = 1, + eval_interval: int = 200, + ): + self.models = [model.clone() for _ in range(num_clones)] + self.loss_fn = loss_fn + self.optimizers = [optimizer(model.parameters()) for model in + self.models] # [optimizer(model.parameters()) for model in self.models] + self.eval_interval = eval_interval + self.best_model = None + + def train( + self, + inputs: List[List[Value]], + targets: List[Value], + epochs: int, + learning_rate: float, + ) -> None: + for epoch in range(epochs): + if epoch < self.eval_interval: + # Train all clones during the evaluation interval + for index, (model, optimizer) in enumerate(zip(self.models, self.optimizers)): + model.number = index + 1 + self._train_one_epoch(index, model, optimizer, inputs, targets, learning_rate) + elif epoch == self.eval_interval: + # Evaluate clones and select the best one + self.best_model = self._evaluate_and_select_best(inputs, targets) + print(f"After {self.eval_interval} epochs, best model selected.") + else: + # Train only the best model + self._train_one_epoch( self.best_model.number, self.best_model, self.optimizers[0], inputs, targets, learning_rate) + + def _train_one_epoch( + self, + index: int, + model: Module, + optimizer: OptimizerAbstract, + inputs: List[List[Value]], + targets: List[Value], + learning_rate: float, + ) -> None: + total_loss = 0 + for input_data, target in zip(inputs, targets): + # Forward pass + predictions = model(input_data) + loss = self.loss_fn(predictions, target) + total_loss += loss.data + + # Backward pass + loss.backward() + + # Update parameters + optimizer.step(model.parameters(), learning_rate) + + # Zero gradients for the next iteration + model.zero_grad() + + print(f"Training Loss: {total_loss / len(inputs):.4f}") + + # print(f"Epoch {epoch + 1}/{epochs}, Clone {index + 1}, Training Loss: {total_loss / len(inputs):.4f}") + print(f"Epoch Clone {index + 1}, Training Loss: {total_loss / len(inputs):.4f}") + # print(f"Clone {i + 1}, Validation Loss: {avg_loss:.4f}") + + def _evaluate_and_select_best(self, inputs: List[List[Value]], targets: List[Value]) -> Module: + best_loss = float("inf") + best_model = None + for model in self.models: + total_loss = 0 + for input_data, target in zip(inputs, targets): + predictions = model(input_data) + loss = self.loss_fn(predictions, target) + total_loss += loss.data + avg_loss = total_loss / len(inputs) + print(f"Model Loss: {avg_loss:.4f}") + if avg_loss < best_loss: + best_loss = avg_loss + best_model = model + return best_model diff --git a/test/test_trainer2.py b/test/test_trainer2.py index 08130066..8c156242 100644 --- a/test/test_trainer2.py +++ b/test/test_trainer2.py @@ -1,6 +1,6 @@ import pytest import torch -from micrograd import Value, MLP, Optimizer, Trainer, OptimizerForComparison +from micrograd import Value, MLP, Optimizer, Trainer, OptimizerForComparison, TrainerForComparison class TorchMLP(torch.nn.Module): @@ -67,7 +67,7 @@ def test_micrograd_vs_torch(): # Micrograd Model micrograd_model = MLP(input_size=2, layer_sizes=[3, 1]) micrograd_optimizer = OptimizerForComparison() - micrograd_trainer = Trainer( + micrograd_trainer = TrainerForComparison( model=micrograd_model, loss_fn=mean_squared_error, optimizer=micrograd_optimizer