diff --git a/ltsm/data_pipeline/data_pipeline.py b/ltsm/data_pipeline/data_pipeline.py index 2b021bd..ba3553f 100644 --- a/ltsm/data_pipeline/data_pipeline.py +++ b/ltsm/data_pipeline/data_pipeline.py @@ -1,11 +1,12 @@ import numpy as np import torch import argparse +import json import random import ipdb from ltsm.data_provider.data_factory import get_datasets -from ltsm.data_provider.data_loader import HF_Dataset +from ltsm.data_provider.data_loader import HF_Dataset, HF_Timestamp_Dataset from ltsm.data_pipeline.model_manager import ModelManager import logging @@ -72,11 +73,10 @@ def run(self): ) train_dataset, eval_dataset, test_datasets, _ = get_datasets(self.args) - train_dataset, eval_dataset= HF_Dataset(train_dataset), HF_Dataset(eval_dataset) - - if self.args.model == 'PatchTST' or self.args.model == 'DLinear': - # Set the patch number to the size of the input sequence including the prompt sequence - self.model_manager.args.seq_len = train_dataset[0]["input_data"].size()[0] + if self.args.model == "Informer": + train_dataset, eval_dataset = HF_Timestamp_Dataset(train_dataset), HF_Timestamp_Dataset(eval_dataset) + else: + train_dataset, eval_dataset= HF_Dataset(train_dataset), HF_Dataset(eval_dataset) model = self.model_manager.create_model() @@ -103,16 +103,24 @@ def run(self): # Testing settings for test_dataset in test_datasets: + if self.args.model == "Informer": + test_ds = HF_Timestamp_Dataset(test_dataset) + else: + test_ds = HF_Dataset(test_dataset) + trainer.compute_loss = self.model_manager.compute_loss trainer.prediction_step = self.model_manager.prediction_step test_dataset = HF_Dataset(test_dataset) - metrics = trainer.evaluate(test_dataset) + metrics = trainer.evaluate(test_ds) trainer.log_metrics("Test", metrics) trainer.save_metrics("Test", metrics) def get_args(): parser = argparse.ArgumentParser(description='LTSM') + + # Load JSON config file + parser.add_argument('--config', type=str, help='Path to JSON configuration file') # Basic Config parser.add_argument('--model_id', type=str, default='test_run', help='model id') @@ -122,8 +130,9 @@ def get_args(): parser.add_argument('--checkpoints', type=str, default='./checkpoints/') # Data Settings + parser.add_argument('--data', help='dataset type') parser.add_argument('--data_path', nargs='+', default='dataset/weather.csv', help='data files') - parser.add_argument('--test_data_path_list', nargs='+', required=True, help='test data file') + parser.add_argument('--test_data_path_list', nargs='+', help='test data file') parser.add_argument('--prompt_data_path', type=str, default='./weather.csv', help='prompt data file') parser.add_argument('--data_processing', type=str, default="standard_scaler", help='data processing method') parser.add_argument('--train_ratio', type=float, default=0.7, help='train data ratio') @@ -153,7 +162,6 @@ def get_args(): parser.add_argument('--model', type=str, default='model', help='model name, , options:[LTSM, LTSM_WordPrompt, LTSM_Tokenizer, DLinear, PatchTST, Informer]') parser.add_argument('--stride', type=int, default=8, help='stride') parser.add_argument('--tmax', type=int, default=10, help='tmax') - parser.add_argument('--dropout', type=float, default=0.05, help='dropout') parser.add_argument('--embed', type=str, default='timeF', help='time features encoding, options:[timeF, fixed, learned]') parser.add_argument('--activation', type=str, default='gelu', help='activation') @@ -200,6 +208,14 @@ def get_args(): args, unknown = parser.parse_known_args() + if args.config: + with open(args.config, 'r') as f: + config = json.load(f) + json_args = argparse.Namespace(**config) + + for key, value in vars(json_args).items(): + setattr(args, key, value) + return args diff --git a/ltsm/data_pipeline/model_manager.py b/ltsm/data_pipeline/model_manager.py index c73cec9..3d2e41f 100644 --- a/ltsm/data_pipeline/model_manager.py +++ b/ltsm/data_pipeline/model_manager.py @@ -126,7 +126,12 @@ def compute_loss(self, model, inputs, return_outputs=False): Returns: torch.Tensor or tuple: The computed loss, and optionally the outputs. """ - outputs = model(inputs["input_data"]) + if self.args.model == 'Informer': + input_data_mark = inputs["timestamp_input"].to(model.module.device) + label_mark = inputs["timestamp_labels"].to(model.module.device) + outputs = model(inputs["input_data"], input_data_mark, inputs["labels"], label_mark) + else: + outputs = model(inputs["input_data"]) loss = nn.functional.mse_loss(outputs, inputs["labels"]) return (loss, outputs) if return_outputs else loss @@ -146,7 +151,12 @@ def prediction_step(self, model, inputs, prediction_loss_only=False, ignore_keys """ input_data = inputs["input_data"].to(model.module.device) labels = inputs["labels"].to(model.module.device) - outputs = model(input_data) + if self.args.model == 'Informer': + input_data_mark = inputs["timestamp_input"].to(model.module.device) + label_mark = inputs["timestamp_labels"].to(model.module.device) + outputs = model(input_data, input_data_mark, labels, label_mark) + else: + outputs = model(input_data) loss = nn.functional.mse_loss(outputs, labels) return (loss, outputs, labels) @@ -160,6 +170,14 @@ def collate_fn(self, batch): Returns: dict: Collated batch with 'input_data' and 'labels' tensors. """ + if self.args.model == 'Informer': + return { + 'input_data': torch.from_numpy(np.stack([x['input_data'] for x in batch])).type(torch.float32), + 'labels': torch.from_numpy(np.stack([x['labels'] for x in batch])).type(torch.float32), + 'timestamp_input': torch.from_numpy(np.stack([x['timestamp_input'] for x in batch])).type(torch.float32), + 'timestamp_labels': torch.from_numpy(np.stack([x['timestamp_labels'] for x in batch])).type(torch.float32) + } + return { 'input_data': torch.from_numpy(np.stack([x['input_data'] for x in batch])).type(torch.float32), 'labels': torch.from_numpy(np.stack([x['labels'] for x in batch])).type(torch.float32), diff --git a/ltsm/data_provider/data_factory.py b/ltsm/data_provider/data_factory.py index 35c7693..8002d10 100644 --- a/ltsm/data_provider/data_factory.py +++ b/ltsm/data_provider/data_factory.py @@ -7,6 +7,7 @@ from ltsm.data_provider.data_splitter import SplitterByTimestamp from ltsm.data_provider.tokenizer import processor_dict from ltsm.data_provider.dataset import TSDataset, TSPromptDataset, TSTokenDataset +from ltsm.data_provider.data_loader import Dataset_Custom, Dataset_ETT_hour, Dataset_ETT_minute from typing import Tuple, List, Union, Dict import logging @@ -341,37 +342,61 @@ def getDatasets(self)->Tuple[TSDataset, TSDataset, List[TSDataset]]: return train_ds, val_ds, test_ds_list def get_datasets(args): - ds_factory = DatasetFactory( - data_paths=args.data_path, - prompt_data_path=args.prompt_data_path, - data_processing=args.data_processing, - seq_len=args.seq_len, - pred_len=args.pred_len, - train_ratio=args.train_ratio, - val_ratio=args.val_ratio, - model=args.model, - downsample_rate=args.downsample_rate, - do_anomaly=args.do_anomaly - ) - train_ds, val_ds, test_ds_list= ds_factory.getDatasets() - - return train_ds, val_ds, test_ds_list, ds_factory.processor + if "LTSM" in args.model: + # Create datasets + dataset_factory = DatasetFactory( + data_paths=args.data_path, + prompt_data_path=args.prompt_data_path, + data_processing=args.data_processing, + seq_len=args.seq_len, + pred_len=args.pred_len, + train_ratio=args.train_ratio, + val_ratio=args.val_ratio, + model=args.model, + split_test_sets=False, + downsample_rate=args.downsample_rate, + do_anomaly=args.do_anomaly + ) + train_dataset, val_dataset, test_datasets = dataset_factory.getDatasets() + processor = dataset_factory.processor + else: + timeenc = 0 if args.embed != 'timeF' else 1 + Data = Dataset_Custom + if args.data == "ETTh1" or args.data == "ETTh2": + Data = Dataset_ETT_hour + elif args.data == "ETTm1" or args.data == "ETTm2": + Data = Dataset_ETT_minute + + train_dataset = Data( + data_path=args.data_path[0], + split='train', + size=[args.seq_len, args.pred_len], + freq=args.freq, + timeenc=timeenc, + features=args.features + ) + val_dataset = Data( + data_path=args.data_path[0], + split='val', + size=[args.seq_len, args.pred_len], + freq=args.freq, + timeenc=timeenc, + features=args.features + ) + test_datasets = [Data( + data_path=args.data_path[0], + split='test', + size=[args.seq_len, args.pred_len], + freq=args.freq, + timeenc=timeenc, + features=args.features + )] + processor = train_dataset.scaler + + return train_dataset, val_dataset, test_datasets, processor def get_data_loaders(args): - # Create datasets - dataset_factory = DatasetFactory( - data_paths=args.data_path, - prompt_data_path=args.prompt_data_path, - data_processing=args.data_processing, - seq_len=args.seq_len, - pred_len=args.pred_len, - train_ratio=args.train_ratio, - val_ratio=args.val_ratio, - model=args.model, - split_test_sets=False, - do_anomaly=args.do_anomaly - ) - train_dataset, val_dataset, test_datasets = dataset_factory.getDatasets() + train_dataset, val_dataset, test_datasets, processor = get_datasets() print(f"Data loaded, train size {len(train_dataset)}, val size {len(val_dataset)}") train_loader = DataLoader( @@ -396,4 +421,4 @@ def get_data_loaders(args): num_workers=0, ) - return train_loader, val_loader, test_loader, dataset_factory.processor \ No newline at end of file + return train_loader, val_loader, test_loader, processor \ No newline at end of file diff --git a/ltsm/data_provider/data_loader.py b/ltsm/data_provider/data_loader.py index 2f52026..928f36f 100644 --- a/ltsm/data_provider/data_loader.py +++ b/ltsm/data_provider/data_loader.py @@ -31,14 +31,42 @@ def inverse_transform(self, data): def add_data(self, df): return self.dataset.add_data(df) - def __getitem__(self, index): - - seq_x, seq_y = self.dataset.__getitem__(index) + def __getitem__(self, index): + outputs = self.dataset.__getitem__(index) + seq_x = outputs[0] + seq_y = outputs[1] return { "input_data": seq_x, "labels": seq_y } + +class HF_Timestamp_Dataset(Dataset): + def __init__(self, dataset): + super().__init__() + self.dataset = dataset + + def __read_data__(self): + return self.dataset.__read_data__() + + def __len__(self): + return self.dataset.__len__() + + def inverse_transform(self, data): + return self.dataset.inverse_transform(data) + + def add_data(self, df): + return self.dataset.add_data(df) + + def __getitem__(self, index): + seq_x, seq_y, seq_x_mark, seq_y_mark = self.dataset.__getitem__(index) + + return { + "input_data": seq_x, + "labels": seq_y, + "timestamp_input": seq_x_mark, + "timestamp_labels": seq_y_mark + } class Dataset_ETT_hour(Dataset): def __init__( @@ -131,8 +159,13 @@ def __getitem__(self, index): s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len - seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] - seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] + if self.enc_in > 1: + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + else: + seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] + seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] + seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] @@ -233,8 +266,13 @@ def __getitem__(self, index): s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len - seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] - seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] + if self.enc_in > 1: + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + else: + seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] + seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] + seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] @@ -345,8 +383,14 @@ def __getitem__(self, index): s_end = s_begin + self.seq_len r_begin = s_end r_end = r_begin + self.pred_len - seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] - seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] + + if self.enc_in > 1: + seq_x = self.data_x[s_begin:s_end] + seq_y = self.data_y[r_begin:r_end] + else: + seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1] + seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1] + seq_x_mark = self.data_stamp[s_begin:s_end] seq_y_mark = self.data_stamp[r_begin:r_end] diff --git a/tests/model/DLinear_test.py b/tests/model/DLinear_test.py index cbdf674..506fb27 100644 --- a/tests/model/DLinear_test.py +++ b/tests/model/DLinear_test.py @@ -20,8 +20,8 @@ def config(tmp_path): "test_data_path_list": [str(data_path)], "prompt_data_path": str(prompt_data_path), "enc_in": 1, - "seq_len": 336+133, # Equal to the sequence length + the length of prompt - "train_epochs": 1000, + "seq_len": 336, # Equal to the sequence length + the length of prompt + "train_epochs": 100, "patience": 10, "lradj": 'TST', "pct_start": 0.2, diff --git a/tests/model/Informer_test.py b/tests/model/Informer_test.py index 1b3395c..0b0aa19 100644 --- a/tests/model/Informer_test.py +++ b/tests/model/Informer_test.py @@ -28,7 +28,7 @@ def config(tmp_path): "dropout": 0.2, "fc_dropout": 0.2, "head_dropout": 0, - "seq_len": 336+133, # Equal to the sequence length + the length of prompt + "seq_len": 336, "patch_len": 16, "stride": 8, "des": 'Exp', diff --git a/tests/model/PatchTST_test.py b/tests/model/PatchTST_test.py index 010a194..1e58dd9 100644 --- a/tests/model/PatchTST_test.py +++ b/tests/model/PatchTST_test.py @@ -27,7 +27,7 @@ def config(tmp_path): "dropout": 0.2, "fc_dropout": 0.2, "head_dropout": 0, - "seq_len": 336+133, # Equal to the sequence length + the length of prompt + "seq_len": 336, "patch_len": 16, "stride": 8, "des": 'Exp', diff --git a/tests/test_scripts/dlinear.json b/tests/test_scripts/dlinear.json new file mode 100644 index 0000000..8fd7111 --- /dev/null +++ b/tests/test_scripts/dlinear.json @@ -0,0 +1,16 @@ +{ + "model": "DLinear", + "model_name_or_path": "gpt2-medium", + "pred_len": 96, + "gradient_accumulation_steps": 64, + "seq_len": 336, + "des": "Exp", + "train_epochs": 100, + "freeze": 0, + "itr": 1, + "learning_rate": 1e-3, + "downsample_rate": 20, + "output_dir": "output/dlinear/", + "eval": 0, + "features": "M" +} \ No newline at end of file diff --git a/tests/test_scripts/informer.json b/tests/test_scripts/informer.json new file mode 100644 index 0000000..b022f65 --- /dev/null +++ b/tests/test_scripts/informer.json @@ -0,0 +1,41 @@ +{ + "model": "Informer", + "model_name_or_path": "gpt2-medium", + "pred_len": 96, + "gradient_accumulation_steps": 64, + "e_layers": 3, + "d_layers": 1, + "n_heads": 16, + "d_model": 128, + "d_ff": 256, + "dropout": 0.2, + "fc_dropout": 0.2, + "head_dropout": 0, + "seq_len": 336, + "patch_len": 16, + "stride": 8, + "des": "Exp", + "train_epochs": 100, + "patience": 10, + "lradj": "TST", + "pct_start": 0.2, + "freeze": 0, + "itr": 1, + "learning_rate": 1e-3, + "downsample_rate": 20, + "output_dir": "output/patchtst/", + "eval": 0, + "padding_patch": "end", + "affine": 0, + "subtract_last": 0, + "decomposition": 0, + "kernel_size": 25, + "individual": 0, + "output_attention": 0, + "embed": "timeF", + "factor": 1, + "distil": 1, + "embed_type": 0, + "activation": "gelu", + "features": "M" +} \ No newline at end of file diff --git a/tests/test_scripts/patchtst.json b/tests/test_scripts/patchtst.json new file mode 100644 index 0000000..02b7a4d --- /dev/null +++ b/tests/test_scripts/patchtst.json @@ -0,0 +1,28 @@ +{ + "model": "PatchTST", + "model_name_or_path": "gpt2-medium", + "pred_len": 96, + "gradient_accumulation_steps": 64, + "e_layers": 3, + "n_heads": 16, + "d_model": 128, + "d_ff": 256, + "dropout": 0.2, + "fc_dropout": 0.2, + "head_dropout": 0, + "seq_len": 336, + "patch_len": 16, + "stride": 8, + "des": "Exp", + "train_epochs": 100, + "patience": 10, + "lradj": "TST", + "pct_start": 0.2, + "freeze": 0, + "itr": 1, + "learning_rate": 1e-3, + "downsample_rate": 20, + "output_dir": "output/patchtst/", + "eval": 0, + "features": "M" +} \ No newline at end of file diff --git a/tests/test_scripts/train_dlinear_csv.sh b/tests/test_scripts/train_dlinear_csv.sh new file mode 100755 index 0000000..16d6698 --- /dev/null +++ b/tests/test_scripts/train_dlinear_csv.sh @@ -0,0 +1,37 @@ +nohup bash -c ' +declare -a data_paths=( + "../../datasets/ETT-small/ETTh1.csv" + "../../datasets/ETT-small/ETTh2.csv" + "../../datasets/ETT-small/ETTm1.csv" + "../../datasets/ETT-small/ETTm2.csv" + "../../datasets/electricity/electricity.csv" + "../../datasets/traffic/traffic.csv" + "../../datasets/exchange_rate/exchange_rate.csv" + "../../datasets/weather/weather.csv" +) + +declare -a data=( + "ETTh1" + "ETTh2" + "ETTm1" + "ETTm2" + "custom" + "custom" + "custom" + "custom" +) + +declare -a features=(7 7 7 7 321 862 8 21) + +declare -a batch_sizes=(128 128 128 128 32 24 128 128) + +for index in "${!data_paths[@]}"; +do + CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \ + --config "dlinear.json" --data_path ${data_paths[$index]} \ + --data ${data[$index]} \ + --enc_in ${features[$index]} \ + --batch_size ${batch_sizes[$index]} +done +' > output.log 2>&1 & +echo $! > save_pid.txt \ No newline at end of file diff --git a/tests/test_scripts/train_informer_csv.sh b/tests/test_scripts/train_informer_csv.sh new file mode 100755 index 0000000..a75d06c --- /dev/null +++ b/tests/test_scripts/train_informer_csv.sh @@ -0,0 +1,40 @@ +nohup bash -c ' +declare -a data_paths=( + "../../datasets/ETT-small/ETTh1.csv" + "../../datasets/ETT-small/ETTh2.csv" + "../../datasets/ETT-small/ETTm1.csv" + "../../datasets/ETT-small/ETTm2.csv" + "../../datasets/electricity/electricity.csv" + "../../datasets/traffic/traffic.csv" + "../../datasets/exchange_rate/exchange_rate.csv" + "../../datasets/weather/weather.csv" +) + +declare -a data=( + "ETTh1" + "ETTh2" + "ETTm1" + "ETTm2" + "custom" + "custom" + "custom" + "custom" +) + +declare -a features=(7 7 7 7 321 862 8 21) + +declare -a batch_sizes=(128 128 128 128 32 24 128 128) + +for index in "${!data_paths[@]}"; +do + CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \ + --config "informer.json" \ + --data_path ${data_paths[$index]} \ + --data ${data[$index]} \ + --enc_in ${features[$index]} \ + --dec_in ${features[$index]} \ + --c_out ${features[$index]} \ + --batch_size ${batch_sizes[$index]} +done +' > output.log 2>&1 & +echo $! > save_pid.txt \ No newline at end of file diff --git a/tests/test_scripts/train_patchtst_csv.sh b/tests/test_scripts/train_patchtst_csv.sh index 38ecf35..74a8e78 100755 --- a/tests/test_scripts/train_patchtst_csv.sh +++ b/tests/test_scripts/train_patchtst_csv.sh @@ -1,44 +1,38 @@ -TRAIN="../../datasets/electricity/electricity.csv" -TEST="../../datasets/electricity/electricity.csv" -PROMPT="../../prompt_bank/prompt_data_normalize_split" +nohup bash -c ' +declare -a data_paths=( + "../../datasets/ETT-small/ETTh1.csv" + "../../datasets/ETT-small/ETTh2.csv" + "../../datasets/ETT-small/ETTm1.csv" + "../../datasets/ETT-small/ETTm2.csv" + "../../datasets/electricity/electricity.csv" + "../../datasets/traffic/traffic.csv" + "../../datasets/exchange_rate/exchange_rate.csv" + "../../datasets/weather/weather.csv" +) -epoch=1000 -downsample_rate=20 -freeze=0 -lr=1e-3 +declare -a data=( + "ETTh1" + "ETTh2" + "ETTm1" + "ETTm2" + "custom" + "custom" + "custom" + "custom" +) -OUTPUT_PATH="output/patchtst_lr${lr}_loraFalse_down${downsample_rate}_freeze${freeze}_e${epoch}_pred${pred_len}/" -echo "Current OUTPUT_PATH: ${OUTPUT_PATH}" +declare -a features=(7 7 7 7 321 862 8 21) -for pred_len in 96 192 336 720 +declare -a batch_sizes=(128 128 128 128 32 24 128 128) + +for index in "${!data_paths[@]}"; do - CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \ - --data_path ${TRAIN} \ - --model PatchTST \ - --model_name_or_path gpt2-medium \ - --pred_len ${pred_len} \ - --gradient_accumulation_steps 64 \ - --test_data_path_list ${TEST} \ - --prompt_data_path ${PROMPT} \ - --enc_in 1 \ - --e_layers 3 \ - --n_heads 16 \ - --d_model 128 \ - --d_ff 256 \ - --dropout 0.2\ - --fc_dropout 0.2\ - --head_dropout 0\ - --seq_len 336\ - --patch_len 16\ - --stride 8\ - --des 'Exp' \ - --train_epochs ${epoch}\ - --patience 10\ - --lradj 'TST'\ - --pct_start 0.2\ - --freeze ${freeze} \ - --itr 1 --batch_size 32 --learning_rate ${lr}\ - --downsample_rate ${downsample_rate} \ - --output_dir ${OUTPUT_PATH}\ - --eval 0 -done \ No newline at end of file + CUDA_VISIBLE_DEVICES=0,1,2,3 python3 main_ltsm.py \ + --config "patchtst.json" \ + --data_path ${data_paths[$index]} \ + --data ${data[$index]} \ + --enc_in ${features[$index]} \ + --batch_size ${batch_sizes[$index]} +done +' > output.log 2>&1 & +echo $! > save_pid.txt \ No newline at end of file