datamllab · Zihang-Xu-2002 · Jan 27, 2025 · Jan 27, 2025
diff --git a/ltsm/data_pipeline/anormly_pipeline.py b/ltsm/data_pipeline/anormly_pipeline.py
@@ -23,7 +23,8 @@
 import logging
 from transformers import (
     Trainer,
-    TrainingArguments
+    TrainingArguments,
+    TrainerCallback,
 )
 
 logging.basicConfig(
@@ -51,6 +52,7 @@ def compute_loss(self, model, inputs, return_outputs=False):
         loss = nn.functional.cross_entropy(outputs, labels)
         #loss = nn.functional.cross_entropy(outputs.reshape(B*L,-1), inputs["labels"][:,1:].long().reshape(B*L))
         return (loss, outputs) if return_outputs else loss
+
     def compute_metrics(self, p):
         preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
         print(preds.shape, p.label_ids.shape)
@@ -67,6 +69,27 @@ def compute_metrics(self, p):
                 "recall": recall_score(label_ids, preds_class, average="micro"),
                 "f1": f1_score(label_ids, preds_class, average="micro")              
         }
+
+
+class CustomTrainer(Trainer):
+    """
+    Custom Trainer class that extends the Trainer class from the Transformers library.
+    This class is used to add custom logging to the Trainer.
+    """
+    def training_step(self, model, inputs):
+        # this func is used to get more information during training
+        # here is used to check the existence of label 1 in the batch
+        labels = inputs["labels"]
+        has_label_one = (labels == 1.).any().item() if labels is not None else False
+        self.current_label_check = has_label_one
+
+        return super().training_step(model, inputs)
+
+    def log(self, logs):
+        # this func add the custom log to Trainer
+        if hasattr(self, "current_label_check"):
+            logs["has_label_one"] = self.current_label_check
+        super().log(logs)
 
 class AnomalyTrainingPipeline():
     """
@@ -113,7 +136,7 @@ def run(self):
             fp16=False,
             save_steps=100,
             eval_steps=25,
-            logging_steps=5,
+            logging_steps=1,
             learning_rate=self.args.learning_rate,
             gradient_accumulation_steps=self.args.gradient_accumulation_steps,
             save_total_limit=10,
@@ -125,7 +148,7 @@ def run(self):
         train_dataset, eval_dataset, test_datasets, _ = get_datasets(self.args)
         train_dataset, eval_dataset= HF_Dataset(train_dataset), HF_Dataset(eval_dataset)
 
-        trainer = Trainer(
+        trainer = CustomTrainer(
             model=model,
             args=training_args,
             data_collator=self.model_manager.collate_fn,
@@ -139,7 +162,7 @@ def run(self):
         # Overload the trainer API
         if not self.args.eval:
             trainer.compute_loss = self.model_manager.compute_loss
-            trainer.prediction_step = self.model_manager.prediction_step        
+            trainer.prediction_step = self.model_manager.prediction_step    
             train_results = trainer.train()
             trainer.save_model()
             trainer.log_metrics("train", train_results.metrics)

diff --git a/tests/test_scripts/anomaly_config/config-1.json b/tests/test_scripts/anomaly_config/config-1.json
@@ -9,14 +9,14 @@
     "prompt_data_path": "../../prompt_bank/stat-prompt/prompt_data_normalize_split",
     "data_processing": "standard_scaler",
     "learning_rate": 1e-4,
-    "batch_size": 100,
+    "batch_size": 8,
     "num_workers": 10,
-    "train_epochs": 4, 
+    "train_epochs": 1, 
     "train_ratio": 0.7,
     "val_ratio": 0.1,
     "do_anomaly": true,
-    "seq_len": 133,
-    "pred_len": 133,
+    "seq_len": 113,
+    "pred_len": 113,
     "prompt_len": 133,
     "lora": false,
     "lora_dim": 128,
@@ -36,7 +36,7 @@
     "tmax": 10,
     "eval": 0,
     "itr": 1,
-    "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_creditcard_113/",
+    "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_creditcard_113_check_bsize=8/",
     "downsample_rate": 20,
     "llm_layers": 32,
     "decay_fac": 0.75,

diff --git a/tests/test_scripts/anomaly_config/config.json b/tests/test_scripts/anomaly_config/config.json
@@ -9,7 +9,7 @@
     "prompt_data_path": "../../prompt_bank/stat-prompt/prompt_data_normalize_split",
     "data_processing": "standard_scaler",
     "learning_rate": 2e-5,
-    "batch_size": 100,
+    "batch_size": 8,
     "num_workers": 10,
     "train_epochs": 4, 
     "train_ratio": 0.7,
@@ -20,7 +20,7 @@
     "prompt_len": 133,
     "lora": false,
     "lora_dim": 128,
-    "gpt_layers": 3,
+    "gpt_layers": 1,
     "d_model": 1024,
     "n_heads": 16,
     "d_ff": 512,
@@ -36,12 +36,12 @@
     "tmax": 10,
     "eval": 0,
     "itr": 1,
-    "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_113",
+    "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_113_check_bsize=8_grad_accumulate=16_layer=1",
     "downsample_rate": 20,
     "llm_layers": 32,
     "decay_fac": 0.75,
     "lradj": "type1",
     "patience": 3,
-    "gradient_accumulation_steps": 64
+    "gradient_accumulation_steps": 16
   }
 
diff --git a/tests/test_scripts/train_anomaly_main_ltsm.sh b/tests/test_scripts/train_anomaly_main_ltsm.sh
@@ -1,4 +1,4 @@
-CONFIG_PATH="./anomaly_config/config-1.json"
+CONFIG_PATH="./anomaly_config/config.json"
 
 CUDA_VISIBLE_DEVICES=6,7 python3 anomaly_main_ltsm.py \
     --config_path ${CONFIG_PATH}