Skip to content

Commit

Permalink
Merge pull request #25 from joshhan619/ltsm-stack
Browse files Browse the repository at this point in the history
Baseline model implementation and unit tests: code looks good to me
  • Loading branch information
ynchuang authored Dec 3, 2024
2 parents 97c0bd9 + 56787cf commit 94dbfc2
Show file tree
Hide file tree
Showing 17 changed files with 1,692 additions and 8 deletions.
41 changes: 38 additions & 3 deletions ltsm/data_pipeline/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ def run(self):
- Evaluating the model on test datasets and logging metrics.
"""
logging.info(self.args)

model = self.model_manager.create_model()

# Training settings
training_args = TrainingArguments(
Expand All @@ -75,6 +73,12 @@ def run(self):

train_dataset, eval_dataset, test_datasets, _ = get_datasets(self.args)
train_dataset, eval_dataset= HF_Dataset(train_dataset), HF_Dataset(eval_dataset)

if self.args.model == 'PatchTST' or self.args.model == 'DLinear':
# Set the patch number to the size of the input sequence including the prompt sequence
self.model_manager.args.seq_len = train_dataset[0]["input_data"].size()[0]

model = self.model_manager.create_model()

trainer = Trainer(
model=model,
Expand Down Expand Up @@ -140,14 +144,31 @@ def get_args():
parser.add_argument('--d_ff', type=int, default=512, help='dimension of fcn')
parser.add_argument('--dropout', type=float, default=0.2, help='dropout')
parser.add_argument('--enc_in', type=int, default=1, help='encoder input size')
parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
parser.add_argument('--c_out', type=int, default=862, help='output size')
parser.add_argument('--patch_size', type=int, default=16, help='patch size')
parser.add_argument('--pretrain', type=int, default=1, help='is pretrain')
parser.add_argument('--local_pretrain', type=str, default="None", help='local pretrain weight')
parser.add_argument('--freeze', type=int, default=1, help='is model weight frozen')
parser.add_argument('--model', type=str, default='model', help='model name, , options:[LTSM, LTSM_WordPrompt, LTSM_Tokenizer]')
parser.add_argument('--model', type=str, default='model', help='model name, , options:[LTSM, LTSM_WordPrompt, LTSM_Tokenizer, DLinear, PatchTST, Informer]')
parser.add_argument('--stride', type=int, default=8, help='stride')
parser.add_argument('--tmax', type=int, default=10, help='tmax')
parser.add_argument('--dropout', type=float, default=0.05, help='dropout')
parser.add_argument('--embed', type=str, default='timeF',
help='time features encoding, options:[timeF, fixed, learned]')
parser.add_argument('--activation', type=str, default='gelu', help='activation')
parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
parser.add_argument('--do_predict', action='store_true', help='whether to predict unseen future data')
parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
parser.add_argument('--factor', type=int, default=1, help='attn factor')
parser.add_argument('--distil', action='store_false',
help='whether to use distilling in encoder, using this argument means not using distilling',
default=True)
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
parser.add_argument('--embed_type', type=int, default=0, help='0: default 1: value embedding + temporal embedding + positional embedding 2: value embedding + temporal embedding 3: value embedding + positional embedding 4: value embedding')
parser.add_argument('--freq', type=str, default='h',
help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')

# Training Settings
parser.add_argument('--eval', type=int, default=0, help='evaluation')
Expand All @@ -163,6 +184,20 @@ def get_args():
parser.add_argument('--lradj', type=str, default='type1', help='learning rate adjustment type')
parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
parser.add_argument('--gradient_accumulation_steps', type=int, default=64, help='gradient accumulation steps')


# PatchTST
parser.add_argument('--fc_dropout', type=float, default=0.05, help='fully connected dropout')
parser.add_argument('--head_dropout', type=float, default=0.0, help='head dropout')
parser.add_argument('--patch_len', type=int, default=16, help='patch length')
parser.add_argument('--padding_patch', default='end', help='None: None; end: padding on the end')
parser.add_argument('--revin', type=int, default=1, help='RevIN; True 1 False 0')
parser.add_argument('--affine', type=int, default=0, help='RevIN-affine; True 1 False 0')
parser.add_argument('--subtract_last', type=int, default=0, help='0: subtract mean; 1: subtract last')
parser.add_argument('--decomposition', type=int, default=0, help='decomposition; True 1 False 0')
parser.add_argument('--kernel_size', type=int, default=25, help='decomposition-kernel')
parser.add_argument('--individual', type=int, default=0, help='individual head; True 1 False 0')

args, unknown = parser.parse_known_args()

return args
Expand Down
165 changes: 165 additions & 0 deletions ltsm/layers/Embed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# code from https://github.com/yuqinie98/PatchTST, with minor modifications
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import weight_norm
import math


class PositionalEmbedding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEmbedding, self).__init__()
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model).float()
pe.require_grad = False

position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)

pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)

def forward(self, x):
return self.pe[:, :x.size(1)]


class TokenEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(TokenEmbedding, self).__init__()
padding = 1 if torch.__version__ >= '1.5.0' else 2
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
kernel_size=3, padding=padding, padding_mode='circular', bias=False)
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')

def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
return x


class FixedEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(FixedEmbedding, self).__init__()

w = torch.zeros(c_in, d_model).float()
w.require_grad = False

position = torch.arange(0, c_in).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

w[:, 0::2] = torch.sin(position * div_term)
w[:, 1::2] = torch.cos(position * div_term)

self.emb = nn.Embedding(c_in, d_model)
self.emb.weight = nn.Parameter(w, requires_grad=False)

def forward(self, x):
return self.emb(x).detach()


class TemporalEmbedding(nn.Module):
def __init__(self, d_model, embed_type='fixed', freq='h'):
super(TemporalEmbedding, self).__init__()

minute_size = 4
hour_size = 24
weekday_size = 7
day_size = 32
month_size = 13

Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
if freq == 't':
self.minute_embed = Embed(minute_size, d_model)
self.hour_embed = Embed(hour_size, d_model)
self.weekday_embed = Embed(weekday_size, d_model)
self.day_embed = Embed(day_size, d_model)
self.month_embed = Embed(month_size, d_model)

def forward(self, x):
x = x.long()

minute_x = self.minute_embed(x[:, :, 4]) if hasattr(self, 'minute_embed') else 0.
hour_x = self.hour_embed(x[:, :, 3])
weekday_x = self.weekday_embed(x[:, :, 2])
day_x = self.day_embed(x[:, :, 1])
month_x = self.month_embed(x[:, :, 0])

return hour_x + weekday_x + day_x + month_x + minute_x


class TimeFeatureEmbedding(nn.Module):
def __init__(self, d_model, embed_type='timeF', freq='h'):
super(TimeFeatureEmbedding, self).__init__()

freq_map = {'h': 4, 't': 5, 's': 6, 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
d_inp = freq_map[freq]
self.embed = nn.Linear(d_inp, d_model, bias=False)

def forward(self, x):
return self.embed(x)


class DataEmbedding(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding, self).__init__()

self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)

def forward(self, x, x_mark):
x = self.value_embedding(x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
return self.dropout(x)


class DataEmbedding_wo_pos(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_pos, self).__init__()

self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)

def forward(self, x, x_mark):
x = self.value_embedding(x) + self.temporal_embedding(x_mark)
return self.dropout(x)

class DataEmbedding_wo_pos_temp(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_pos_temp, self).__init__()

self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)

def forward(self, x, x_mark):
x = self.value_embedding(x)
return self.dropout(x)

class DataEmbedding_wo_temp(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding_wo_temp, self).__init__()

self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)

def forward(self, x, x_mark):
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x)
Loading

0 comments on commit 94dbfc2

Please sign in to comment.