-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* new version * foced direct agents to save * implemented lr scheduler for transformer
- Loading branch information
Showing
5 changed files
with
302 additions
and
1,140 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_utils/40_ml_utils.ipynb. | ||
|
||
# %% auto 0 | ||
__all__ = ['LRSchedulerPerStep'] | ||
|
||
# %% ../nbs/00_utils/40_ml_utils.ipynb 3 | ||
from typing import List, Tuple, Literal | ||
import torch | ||
|
||
# %% ../nbs/00_utils/40_ml_utils.ipynb 4 | ||
class LRSchedulerPerStep(): | ||
""" | ||
Learning rate scheduler from Attention is all you need paper (https://arxiv.org/abs/1706.03762) | ||
One ajustment: Added base LR as tunable parameter rather than setting it automated based on model dimension | ||
""" | ||
|
||
def __init__(self, | ||
optimizer: torch.optim.Optimizer, # Optimizer to adjust learning rate for | ||
base_learning_rate: float = 0.0001, | ||
warmup: int =4000): | ||
|
||
# Ensure optimizer is a PyTorch optimizer | ||
if not isinstance(optimizer, torch.optim.Optimizer): | ||
raise ValueError('Optimizer must be a PyTorch optimizer') | ||
|
||
self.optimizer = optimizer | ||
self.basic = base_learning_rate | ||
self.warm = warmup**-1.5 | ||
self.scaling_factor = 1/warmup**-0.5 # ensures that the peak realtive to the base lr is always 1 | ||
|
||
self.step_num = 0 | ||
self.step() | ||
|
||
def step(self): | ||
self.step_num += 1 | ||
lr = self.basic * self.scaling_factor * min(self.step_num**-0.5, self.step_num*self.warm) | ||
|
||
for param_group in self.optimizer.param_groups: | ||
param_group['lr'] = lr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# ML utils\n", | ||
"\n", | ||
"> Some helper functions for machine learning tasks." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#| default_exp ml_utils" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#| hide\n", | ||
"from nbdev.showdoc import *" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#| export\n", | ||
"\n", | ||
"from typing import List, Tuple, Literal\n", | ||
"import torch" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#| export\n", | ||
"\n", | ||
"class LRSchedulerPerStep():\n", | ||
" \"\"\"\n", | ||
" Learning rate scheduler from Attention is all you need paper (https://arxiv.org/abs/1706.03762)\n", | ||
" One ajustment: Added base LR as tunable parameter rather than setting it automated based on model dimension\n", | ||
" \"\"\"\n", | ||
" \n", | ||
" def __init__(self,\n", | ||
" optimizer: torch.optim.Optimizer, # Optimizer to adjust learning rate for\n", | ||
" base_learning_rate: float = 0.0001,\n", | ||
" warmup: int =4000):\n", | ||
"\n", | ||
" # Ensure optimizer is a PyTorch optimizer\n", | ||
" if not isinstance(optimizer, torch.optim.Optimizer):\n", | ||
" raise ValueError('Optimizer must be a PyTorch optimizer')\n", | ||
" \n", | ||
" self.optimizer = optimizer\n", | ||
" self.basic = base_learning_rate\n", | ||
" self.warm = warmup**-1.5\n", | ||
" self.scaling_factor = 1/warmup**-0.5 # ensures that the peak realtive to the base lr is always 1\n", | ||
"\n", | ||
" self.step_num = 0 \n", | ||
" self.step()\n", | ||
" \n", | ||
" def step(self):\n", | ||
" self.step_num += 1\n", | ||
" lr = self.basic * self.scaling_factor * min(self.step_num**-0.5, self.step_num*self.warm)\n", | ||
" \n", | ||
" for param_group in self.optimizer.param_groups:\n", | ||
" param_group['lr'] = lr" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#| hide\n", | ||
"import nbdev; nbdev.nbdev_export()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "python3", | ||
"language": "python", | ||
"name": "python3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.