-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfingpt.py
114 lines (99 loc) · 3.38 KB
/
fingpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
os.environ["PATH"] = f"{os.environ['PATH']}:/usr/local/cuda/bin"
os.environ['LD_LIBRARY_PATH'] = "/usr/local/cuda/lib64/"
# huggingface-cli download --resume-download THUDM/chatglm2-6b --local-dir /home/ouyangkun/LLM/chatglm2-6b
# 'libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'
from typing import List, Dict, Optional
import torch
from loguru import logger
from transformers import (
AutoModel,
AutoTokenizer,
TrainingArguments,
Trainer,
BitsAndBytesConfig
)
from peft import (
TaskType,
LoraConfig,
get_peft_model,
set_peft_model_state_dict,
prepare_model_for_kbit_training,
prepare_model_for_int8_training,
)
from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING
training_args = TrainingArguments(
output_dir='./finetuned_model', # saved model path
logging_steps = 500,
# max_steps=10000,
num_train_epochs = 2,
per_device_train_batch_size=4,
gradient_accumulation_steps=8,
learning_rate=1e-4,
weight_decay=0.01,
warmup_steps=1000,
save_steps=500,
fp16=True,
# bf16=True,
torch_compile = False,
load_best_model_at_end = True,
evaluation_strategy="steps",
remove_unused_columns=False,
)
q_config = BitsAndBytesConfig(load_in_4bit=True,
bnb_4bit_quant_type='nf4',
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.float16
)
# Load tokenizer & model
# need massive space
model_name = "THUDM/chatglm2-6b"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(
model_name,
quantization_config=q_config,
trust_remote_code=True,
device='cuda'
)
model = prepare_model_for_int8_training(model, use_gradient_checkpointing=True)
def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
# LoRA
target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING['chatglm']
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
r=8,
lora_alpha=32,
lora_dropout=0.1,
target_modules=target_modules,
bias='none',
)
model = get_peft_model(model, lora_config)
print_trainable_parameters(model)
resume_from_checkpoint = None
if resume_from_checkpoint is not None:
checkpoint_name = os.path.join(resume_from_checkpoint, 'pytorch_model.bin')
if not os.path.exists(checkpoint_name):
checkpoint_name = os.path.join(
resume_from_checkpoint, 'adapter_model.bin'
)
resume_from_checkpoint = False
if os.path.exists(checkpoint_name):
logger.info(f'Restarting from {checkpoint_name}')
adapters_weights = torch.load(checkpoint_name)
set_peft_model_state_dict(model, adapters_weights)
else:
logger.info(f'Checkpoint {checkpoint_name} not found')
model.print_trainable_parameters()