diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..69ab8f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +data +tensor_files +CIFAR10 +models +plt_storage +output +__pycache__ +.neptune +.vscode +kl_maps +ffn_aug_maps +ffn_kl_maps +*.pth +mode_pths +*.png +*.pt +hessian_value_pts +*.txt +.ipynb_checkpoints +notebook_results +hessian_value_pts +BERT_tiny_4l +lls_logs +layer_hessian_results diff --git a/README.md b/README.md new file mode 100644 index 0000000..0d14a55 --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +# Self-Attention Recovery for QAT(SARQ) Implementation +This Repository contains SARQ code for **Self-Attention Map is All You Need for QAT of Finetuned Transformers** + +스크린샷 2022-02-07 오후 2 03 19 + +## Environments +``` +pip install -r requirements.txt +``` + +## Model +You can get GLUE task specific fine-tuned BERT base model using huggingface code. +https://github.com/huggingface/transformers/tree/master/examples/pytorch/text-classification + + +## GLUE Dataset +### Data +Download GLUE. +https://github.com/nyu-mll/GLUE-baselines + +## Self-Attention Recovery for QAT (SARQ) +Proposed SARQ method consists of Two Steps. (See Figure for Two Step SARQ) + +1. Teacher Intervention is employed to finetune quantized weights of attention propagation (PROP) +2. Quantization is applied to the entire weights of Transformer layers for QAT + +You can easily try SARQ two step Training using bash scripts. +``` +# For SARQ Two Step Training (w/o DA) +bash run_SARQ_two_step.sh {GPU Num} {GLUE Task} # bash run_SARQ_two_step.sh 0 sts-b + +# For SARQ 1 Step Training (w/o DA) +bash run_SARQ_1step.sh {GPU Num} {GLUE Task} {DA option} {DA N param} # bash run_SARQ-1step.sh 0 sts-b 0 0 + +# For TernaryBERT Training for comparison +bash run_glue.sh {GPU Num} {GLUE Task} # bash run_glue.sh 0 sts-b + +``` + +For Data Augmentation (DA) Option, use TinyBERT Data Augmentation for getting expanded GLUE Dataset. + +https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/TinyBERT + + +## Arguments +To be Updated. + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..cf4983c --- /dev/null +++ b/main.py @@ -0,0 +1,1008 @@ + +# This code is implemented base on "TernaryBERT: Distillation-aware Ultra-low Bit BERT" (Zhang et al, EMNLP2020) +# https://arxiv.org/abs/2009.12812 + +from __future__ import absolute_import, division, print_function + +import argparse +import logging +import os +import random +import sys +import pickle +import copy +import collections +import math + +import numpy as np +import numpy +import torch +from torch.utils.data import DataLoader, RandomSampler, SequentialSampler,TensorDataset + +from torch.nn import CrossEntropyLoss, MSELoss, CosineEmbeddingLoss + +from transformer import BertForSequenceClassification,WEIGHTS_NAME, CONFIG_NAME +from transformer.modeling_quant import BertForSequenceClassification as QuantBertForSequenceClassification +from transformer import BertTokenizer +from transformer import BertAdam +from transformer import BertConfig +from utils_glue import * + +import numpy as np + +import torch.nn.functional as F +import time + +log_format = '%(asctime)s %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.INFO, + format=log_format, datefmt='%m/%d %I:%M:%S %p') +logger = logging.getLogger() + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def get_tensor_data(output_mode, features): + if output_mode == "classification": + all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long) + elif output_mode == "regression": + all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float) + + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) + all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long) + tensor_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,all_label_ids, all_seq_lengths) + return tensor_data, all_label_ids + +def do_eval(model, task_name, eval_dataloader, + device, output_mode, eval_labels, num_labels, teacher_model=None): + eval_loss = 0 + nb_eval_steps = 0 + preds = [] + + for batch_ in eval_dataloader: + batch_ = tuple(t.to(device) for t in batch_) + + with torch.no_grad(): + input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch_ + + if teacher_model is not None: + teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids, segment_ids, input_mask) + logits, student_atts, student_reps, student_probs, student_values = model(input_ids, segment_ids, input_mask, teacher_outputs=(teacher_probs, teacher_values, teacher_reps, teacher_logits, teacher_atts)) + else: + logits, _, _, _, _ = model(input_ids, segment_ids, input_mask) + + # create eval loss and other metric required by the task + if output_mode == "classification": + loss_fct = CrossEntropyLoss() + tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1)) + elif output_mode == "regression": + loss_fct = MSELoss() + tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1)) + + eval_loss += tmp_eval_loss.mean().item() + nb_eval_steps += 1 + if len(preds) == 0: + preds.append(logits.detach().cpu().numpy()) + else: + preds[0] = np.append( + preds[0], logits.detach().cpu().numpy(), axis=0) + + eval_loss = eval_loss / nb_eval_steps + + preds = preds[0] + if output_mode == "classification": + preds = np.argmax(preds, axis=1) + elif output_mode == "regression": + preds = np.squeeze(preds) + result = compute_metrics(task_name, preds, eval_labels.numpy()) + result['eval_loss'] = eval_loss + return result + +def soft_cross_entropy(predicts, targets): + student_likelihood = torch.nn.functional.log_softmax(predicts, dim=-1) + targets_prob = torch.nn.functional.softmax(targets, dim=-1) + return torch.sum((- targets_prob * student_likelihood), dim=-1).mean() + +def main(): + + # ================================================================================ # + # ArgParse + # ================================================================================ # + parser = argparse.ArgumentParser() + parser.add_argument("--data_dir", + default='data', + type=str, + help="The input data dir. Should contain the .tsv files (or other data files) for the task.") + parser.add_argument("--model_dir", + default='models', + type=str, + help="The model dir.") + parser.add_argument("--teacher_model", + default=None, + type=str, + help="The models directory.") + parser.add_argument("--student_model", + default=None, + type=str, + help="The models directory.") + parser.add_argument("--task_name", + default='sst-2', + type=str, + help="The name of the task to train.") + parser.add_argument("--output_dir", + default='output', + type=str, + help="The output directory where the model predictions and checkpoints will be written.") + + parser.add_argument("--learning_rate", + default=2e-5, + type=float, + help="The initial learning rate for Adam.") + parser.add_argument("--num_train_epochs", + default=3.0, + type=float, + help="Total number of training epochs to perform.") + parser.add_argument('--seed', + type=int, + default=42, + help="random seed for initialization") + + parser.add_argument('--save_fp_model', + action='store_true', + help="Whether to save fp32 model") + + parser.add_argument('--save_quantized_model', + default=False, type=str2bool, + help="Whether to save quantized model") + + parser.add_argument("--input_bits", + default=8, + type=int, + help="Quantization bits for activation.") + + parser.add_argument("--tc_top_k", + default=3, + type=int, + help="Top-K Coverage") + + parser.add_argument("--gpus", + default=1, + type=int, + help="Number of GPUs to use") + parser.add_argument("--clip_val", + default=2.5, + type=float, + help="Initial clip value.") + + parser.add_argument('--qk_FP', + default=False, type=str2bool, + ) + + parser.add_argument('--qkv_FP', + default=False, type=str2bool, + ) + + parser.add_argument('--neptune', + default=True, type=str2bool, + help="neptune logging option") + + #MSKIM Quantization Range Option + parser.add_argument('--quantize', + default =True, type=str2bool, + help="Whether to quantize student model") + + parser.add_argument('--ffn_1', + default =True, type=str2bool, + help="Whether to quantize Feed Forward Network") + + parser.add_argument('--ffn_2', + default =True, type=str2bool, + help="Whether to quantize Feed Forward Network") + + parser.add_argument('--qkv', + default =True, type=str2bool, + help="Whether to quantize Query, Key, Value Mapping Weight Matrix") + + parser.add_argument('--emb', + default =True, type=str2bool, + help="Whether to quantize Embedding Layer") + + parser.add_argument('--cls', + default =True, type=str2bool, + help="Whether to quantize Classifier Dense Layer") + + parser.add_argument('--aug_train', + default =False, type=str2bool, + help="Whether to use augmented data or not") + + parser.add_argument('--clipping', + default =False, type=str2bool, + help="Whether to use FP Weight Clipping") + + + parser.add_argument("--mean_scale", + default=0.7, + type=float, + help="Ternary Clipping Value Scale Value") + + parser.add_argument("--exp_name", + default="", + type=str, + help="Output Directory Name") + + parser.add_argument("--training_type", + default="qat_normal", + type=str, + help="QAT Method") + + parser.add_argument("--aug_N", + default=30, + type=int, + help="Data Augmentation N Number") + + parser.add_argument('--pred_distill', + default =False, type=str2bool, + help="prediction distill option") + + parser.add_argument('--attn_distill', + default =True, type=str2bool, + help="attention Score Distill Option") + + parser.add_argument('--rep_distill', + default =True, type=str2bool, + help="Transformer Layer output Distill Option") + + parser.add_argument('--output_distill', + default =False, type=str2bool, + help="Context Value Distill Option") + + parser.add_argument('--gt_loss', + default =False, type=str2bool, + help="Ground Truth Option") + + # Teacher Intervention Options + parser.add_argument('--teacher_attnmap', + default =False, type=str2bool, + help="Teacher Intervention Option (TI-M)") + parser.add_argument('--teacher_output', + default =False, type=str2bool, + help="Teacher Intervention Option (TI-O)") + parser.add_argument('--teacher_gradual', + default =False, type=str2bool, + help="Teacher Intervention Option (TI-G)") + + parser.add_argument('--teacher_stochastic', + default =False, type=str2bool, + help="Teacher Intervention Option (Stochastic Mixed)") + + parser.add_argument('--teacher_inverted', + default =False, type=str2bool, + help="Teacher Intervention Option (Stochastic Mixed)") + + parser.add_argument('--teacher_context', + default =False, type=str2bool, + help="Teacher Intervention Option (Context)") + + parser.add_argument('--step1_option', + default ="GRAD", type=str, + help="Teacher Intervention Step-1 Option (For step-2 model init)") + + parser.add_argument('--bert', + default ="base", type=str, + ) + + args = parser.parse_args() + + # ================================================================================ # + # Logging setup + # ================================================================================ # + run = None + + # Use Neptune for logging + if args.neptune: + import neptune.new as neptune + run = neptune.init_run(project='niceball0827/' + args.task_name.upper(), + api_token='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLC\ + JhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI0YjM\ + 0ZTYwMi1kNjQwLTQ4NGYtOTYxMy03Mjc5ZmVkMzY2YTgifQ==') + + # run = neptune.init(project='Neptune_ID/ProjectName', + # api_token='Neptune_API_Token') + + # ================================================================================ # + # Load Directory + # ================================================================================ # + + # Exp Name + exp_name = args.exp_name + + exp_name += f"_{args.bert}" + + if args.training_type == "qat_step1": + if args.teacher_attnmap: + exp_name += f"_MI" + if args.teacher_context: + exp_name += f"_CI" + if args.teacher_output: + exp_name += f"_OI" + if args.teacher_gradual: + exp_name += f"_GRAD" + if args.teacher_inverted: + exp_name += f"_INVERTED" + if args.teacher_stochastic: + exp_name += f"_STOCHASTIC" + + else: + if args.gt_loss: + exp_name += "_G" + if args.attn_distill: + exp_name += "_S" + if args.rep_distill: + exp_name += "_R" + if args.output_distill: + exp_name += "_O" + exp_name += f"_{args.seed}" + + + if args.training_type == "qat_step2": + exp_name += f"_{args.step1_option}" + + args.exp_name = exp_name + + if args.aug_train: + logger.info(f'DA QAT') + + logger.info(f'EXP SET: {exp_name}') + logger.info(f'TASK: {args.task_name}') + logger.info(f"SIZE: {args.bert}") + logger.info(f"SEED: {args.seed}") + logger.info(f'EPOCH: {args.num_train_epochs}') + + # GLUE Dataset Setting + task_name = args.task_name.lower() + data_dir = os.path.join(args.data_dir,task_name) + processed_data_dir = os.path.join(data_dir,'preprocessed') + if not os.path.exists(processed_data_dir): + os.mkdir(processed_data_dir) + + # BERT Large Option + if args.bert == "large": + args.model_dir = os.path.join(args.model_dir, "BERT_large") + args.output_dir = os.path.join(args.output_dir, "BERT_large") + + if args.bert == "tiny-4l": + args.model_dir = os.path.join(args.model_dir, "BERT_Tiny_4l") + args.output_dir = os.path.join(args.output_dir, "BERT_Tiny_4l") + + if args.bert == "tiny-6l": + args.model_dir = os.path.join(args.model_dir, "BERT_Tiny_6l") + args.output_dir = os.path.join(args.output_dir, "BERT_Tiny_6l") + + # Model Save Directory + output_dir = os.path.join(args.output_dir,task_name) + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + if args.save_quantized_model: + output_quant_dir = os.path.join(output_dir, 'exploration') + if not os.path.exists(output_quant_dir): + os.mkdir(output_quant_dir) + + if not os.path.exists(output_quant_dir): + os.makedirs(output_quant_dir) + + output_quant_dir = os.path.join(output_quant_dir, args.exp_name) + if not os.path.exists(output_quant_dir): + os.makedirs(output_quant_dir) + + # ================================================================================ # + # Load Pths + # ================================================================================ # + # Student Model Pretrained FIle + + if args.training_type == "qat_normal": + args.student_model = os.path.join(args.model_dir,task_name) + elif args.training_type == "qat_step1": + args.student_model = os.path.join(args.model_dir, task_name) + elif args.training_type == "qat_step2": + args.student_model = os.path.join(args.output_dir, task_name, "exploration", f"TI_step1_{args.bert}_{args.step1_option}") + else: + raise ValueError("Choose Training Type {downsteam, qat_normal, qat_step1, qat_step2, qat_step3, gradual}") + + # Teacher Model Pretrained FIle + args.teacher_model = os.path.join(args.model_dir,task_name) + + processors = { + "cola": ColaProcessor, + "mnli": MnliProcessor, + "mnli-mm": MnliMismatchedProcessor, + "mrpc": MrpcProcessor, + "sst-2": Sst2Processor, + "sts-b": StsbProcessor, + "qqp": QqpProcessor, + "qnli": QnliProcessor, + "rte": RteProcessor + } + + output_modes = { + "cola": "classification", + "mnli": "classification", + "mrpc": "classification", + "sst-2": "classification", + "sts-b": "regression", + "qqp": "classification", + "qnli": "classification", + "rte": "classification" + } + + default_params = { + "cola": {"max_seq_length": 64,"batch_size":16,"eval_step": 2000 if args.aug_train else 50}, # No Aug : 50 Aug : 400 + "mnli": {"max_seq_length": 128,"batch_size":32,"eval_step":8000}, + "mrpc": {"max_seq_length": 128,"batch_size":32,"eval_step":1000 if args.aug_train else 50}, + "sst-2": {"max_seq_length": 64,"batch_size":32,"eval_step":100}, + "sts-b": {"max_seq_length": 128,"batch_size":32,"eval_step":2000 if args.aug_train else 100}, + "qqp": {"max_seq_length": 128,"batch_size":32,"eval_step":1000}, + "qnli": {"max_seq_length": 128,"batch_size":32,"eval_step":1000}, + "rte": {"max_seq_length": 128,"batch_size":32,"eval_step":1000 if args.aug_train else 20} + } + + acc_tasks = ["mnli", "mrpc", "sst-2", "qqp", "qnli", "rte"] + corr_tasks = ["sts-b"] + mcc_tasks = ["cola"] + + # ================================================================================ # + # prepare devices + # ================================================================================ # + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + n_gpu = args.gpus + + # ================================================================================ # + # prepare seed + # ================================================================================ # + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + + if n_gpu > 0: + torch.cuda.manual_seed_all(args.seed) + + if task_name in default_params: + args.batch_size = default_params[task_name]["batch_size"] + if n_gpu > 0: + args.batch_size = int(args.batch_size*n_gpu) + args.max_seq_length = default_params[task_name]["max_seq_length"] + args.eval_step = default_params[task_name]["eval_step"] + + processor = processors[task_name]() + output_mode = output_modes[task_name] + label_list = processor.get_labels() + num_labels = len(label_list) + + # ================================================================================ # + # Load Vocab FIle -> Tokenization + # ================================================================================ # + tokenizer = BertTokenizer.from_pretrained(args.teacher_model, do_lower_case=True) + + # ================================================================================ # + # Dataset Setup (with DA) + # ================================================================================ # + if args.aug_train: # Data Augmentation + try: + train_file = os.path.join(processed_data_dir,f'aug_data_{args.aug_N}.pkl') + train_features = pickle.load(open(train_file,'rb')) + except: + train_examples = processor.get_aug_examples(data_dir, args.aug_N) + train_features = convert_examples_to_features(train_examples, label_list, + args.max_seq_length, tokenizer, output_mode) + with open(train_file, 'wb') as f: + pickle.dump(train_features, f, protocol=pickle.HIGHEST_PROTOCOL) + else: + try: + train_file = os.path.join(processed_data_dir,'data.pkl') + train_features = pickle.load(open(train_file,'rb')) + + except: + train_examples = processor.get_train_examples(data_dir) + train_features = convert_examples_to_features(train_examples, label_list, + args.max_seq_length, tokenizer, output_mode) + + with open(train_file, 'wb') as f: + pickle.dump(train_features, f, protocol=pickle.HIGHEST_PROTOCOL) + + num_train_epochs = args.num_train_epochs + num_train_optimization_steps = math.ceil(len(train_features) / args.batch_size) * num_train_epochs + + # TI Step-2 Iteration Number Setting + if "tiny-4l" in args.bert or task_name == "cola": + ti_step_1_total_step = 120 + else: + ti_step_1_total_step = 60 + + if args.training_type == "qat_step1": + args.eval_step = 10 + num_train_optimization_steps = ti_step_1_total_step + + # We keep total two-step QAT iteration number identical to baseline TernaryBERT QAT setting + # Total Iteration Step = N + # TI step-1 iteration step = s + # TI step-2 iteration step = N - s + + if args.training_type == "qat_step2" : + num_train_optimization_steps = num_train_optimization_steps - ti_step_1_total_step + + train_data, _ = get_tensor_data(output_mode, train_features) + train_sampler = RandomSampler(train_data) + train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.batch_size) + + # Dev Data load + try: + dev_file = train_file = os.path.join(processed_data_dir,'dev.pkl') + eval_features = pickle.load(open(dev_file,'rb')) + except: + eval_examples = processor.get_dev_examples(data_dir) + eval_features = convert_examples_to_features(eval_examples, label_list, args.max_seq_length, tokenizer, output_mode) + with open(dev_file, 'wb') as f: + pickle.dump(eval_features, f, protocol=pickle.HIGHEST_PROTOCOL) + + eval_data, eval_labels = get_tensor_data(output_mode, eval_features) + eval_sampler = SequentialSampler(eval_data) + eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.batch_size) + + if task_name == "mnli": + processor = processors["mnli-mm"]() + try: + dev_mm_file = train_file = os.path.join(processed_data_dir,'dev-mm_data.pkl') + mm_eval_features = pickle.load(open(dev_mm_file,'rb')) + except: + mm_eval_examples = processor.get_dev_examples(data_dir) + mm_eval_features = convert_examples_to_features( + mm_eval_examples, label_list, args.max_seq_length, tokenizer, output_mode) + with open(dev_mm_file, 'wb') as f: + pickle.dump(mm_eval_features, f, protocol=pickle.HIGHEST_PROTOCOL) + + mm_eval_data, mm_eval_labels = get_tensor_data(output_mode, mm_eval_features) + # logger.info(" Num examples = %d", len(mm_eval_features)) + + mm_eval_sampler = SequentialSampler(mm_eval_data) + mm_eval_dataloader = DataLoader(mm_eval_data, sampler=mm_eval_sampler, + batch_size=args.batch_size) + + + # ================================================================================ # + # Build Teacher Model + # ================================================================================ # + teacher_model = BertForSequenceClassification.from_pretrained(args.teacher_model, num_labels=num_labels) + + teacher_model.to(device) + teacher_model.eval() + + if n_gpu > 1: + teacher_model = torch.nn.DataParallel(teacher_model) + + result = do_eval(teacher_model, task_name, eval_dataloader, + device, output_mode, eval_labels, num_labels) + + # ================================================================================ # + # Save Teacher Model Peroformance for KD Training + # ================================================================================ # + if task_name in acc_tasks: + if task_name in ['sst-2','mnli','qnli','rte']: + fp32_performance = f"acc:{result['acc']}" + fp32_score = result['acc'] + elif task_name in ['mrpc','qqp']: + fp32_performance = f"f1/acc:{result['f1']}/{result['acc']} avg : {(result['f1'] + result['acc'])*50}" + fp32_score = (result['f1'] + result['acc'])*50 + if task_name in corr_tasks: + fp32_performance = f"pearson/spearmanr:{result['pearson']}/{result['spearmanr']} corr:{result['corr']}" + fp32_score = result['corr']*100 + + if task_name in mcc_tasks: + fp32_performance = f"mcc:{result['mcc']}" + fp32_score = result['mcc'] + + if task_name == "mnli": + result = do_eval(teacher_model, 'mnli-mm', mm_eval_dataloader, + device, output_mode, mm_eval_labels, num_labels) + fp32_performance += f" mm-acc:{result['acc']}" + fp32_score = result['acc'] + fp32_performance = task_name +' fp32 ' + fp32_performance + + # ================================================================================ # + # Build Student Model + # ================================================================================ # + student_config = BertConfig.from_pretrained(args.student_model, + clip_val = args.clip_val, + quantize = args.quantize, + ffn_q_1 = args.ffn_1, + ffn_q_2 = args.ffn_2, + qkv_q = args.qkv, + emb_q = args.emb, + cls_q = args.cls, + mean_scale = args.mean_scale, + teacher_attnmap = args.teacher_attnmap, + teacher_context = args.teacher_context, + teacher_output = args.teacher_output, + ) + + student_model = QuantBertForSequenceClassification.from_pretrained(args.student_model, config = student_config, num_labels=num_labels) + student_model.to(device) + + # ================================================================================ # + # Training Setting + # ================================================================================ # + if n_gpu > 1: + student_model = torch.nn.DataParallel(student_model) + param_optimizer = list(student_model.named_parameters()) + + no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] + + optimizer_grouped_parameters = [ + {'params': [p for n, p in param_optimizer if not any(nd in n for nd in (no_decay))], 'weight_decay': 0.01}, + {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}, + ] + + schedule = 'warmup_linear' + optimizer = BertAdam(optimizer_grouped_parameters, + schedule=schedule, + lr=args.learning_rate, + warmup=0.1, + t_total=num_train_optimization_steps) + + + norm_func = torch.linalg.norm + loss_cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6) + + global_step = 0 + best_dev_acc = 0.0 + previous_best = None + + # ================================================================================ # + # Training Start + # ================================================================================ # + + logger.info("***** Running training *****") + logger.info(" Num examples = %d", len(train_features)) + logger.info(" Batch size = %d", args.batch_size) + logger.info(" Num steps = %d", num_train_optimization_steps) + + # Loss Init AverageMeter + l_gt_loss = AverageMeter() + l_att_loss = AverageMeter() + l_rep_loss = AverageMeter() + l_cls_loss = AverageMeter() + l_output_loss = AverageMeter() + l_loss = AverageMeter() + + mixed_status = None + ce_loss_func = CrossEntropyLoss() + kl_loss = torch.nn.KLDivLoss(reduction="batchmean") + cos_loss_func = CosineEmbeddingLoss() + loss_mse = MSELoss() + + for epoch_ in range(int(num_train_epochs)): + + for batch in train_dataloader: + + # Gradual TI (You could try other TI options - Stochastic/Inverted) + if args.training_type == "qat_step1" and args.teacher_gradual: + if global_step < num_train_optimization_steps / 6: + student_config.teacher_output = True + mixed_status = "OI" + elif global_step < num_train_optimization_steps / 3: + student_config.teacher_output = False + student_config.teacher_context = True + mixed_status = "CI" + else: + student_config.teacher_output = False + student_config.teacher_context = False + student_config.teacher_attnmap = True + mixed_status = "MI" + + if args.training_type == "qat_step1" and args.teacher_stochastic: + rand_int = torch.randint(1,4,(1,))[0].item() + + if rand_int == 1 : + student_config.teacher_output = True + student_config.teacher_context = False + student_config.teacher_attnmap = False + mixed_status = "OI" + elif rand_int == 2 : + student_config.teacher_output = False + student_config.teacher_context = True + student_config.teacher_attnmap = False + mixed_status = "CI" + else: + student_config.teacher_output = False + student_config.teacher_context = False + student_config.teacher_attnmap = True + mixed_status = "MI" + + if args.training_type == "qat_step1" and args.teacher_inverted: + if global_step < num_train_optimization_steps / 3: + student_config.teacher_attnmap = True + mixed_status = "MI" + elif global_step < num_train_optimization_steps * 2/ 3: + student_config.teacher_attnmap = False + student_config.teacher_context = True + mixed_status = "CI" + else: + student_config.teacher_attnmap = False + student_config.teacher_context = False + student_config.teacher_output = True + mixed_status = "OI" + + student_model.train() + + batch = tuple(t.to(device) for t in batch) + input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch + + # tmp loss init + att_loss = 0. + rep_loss = 0. + cls_loss = 0. + attscore_loss = 0. + output_loss = 0. + loss = 0. + + with torch.no_grad(): + teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_attn_blocks = teacher_model(input_ids, segment_ids, input_mask) + + student_logits, student_atts, student_reps, student_probs, student_attn_blocks = student_model(input_ids, segment_ids, input_mask, teacher_outputs=(teacher_probs, teacher_attn_blocks)) + + # We did not use GT-Loss for fair comparison to TernaryBERT QAT (note that GT-loss helps boosting resulting accuracy in some tasks) + if args.gt_loss: + if output_mode == "classification": + loss = ce_loss_func(student_logits, label_ids) + + elif output_mode == "regression": + loss = loss_mse(student_logits, teacher_logits) + + l_gt_loss.update(loss.item()) + + # Pred Loss (TernaryBERT Loss) + if args.pred_distill: + if output_mode == "classification": + cls_loss = soft_cross_entropy(student_logits,teacher_logits) + elif output_mode == "regression": + cls_loss = MSELoss()(student_logits, teacher_logits) + else: + cls_loss = soft_cross_entropy(student_logits,teacher_logits) + l_cls_loss.update(cls_loss.item()) + + # Output Loss + if args.output_distill: + for i, (student_attn_block, teacher_attn_block) in enumerate(zip(student_attn_blocks, teacher_attn_blocks)): + tmp_loss = MSELoss()(student_attn_block[1], teacher_attn_block[1]) # 1 : Attention Output 0 : Layer Context + output_loss += tmp_loss + l_output_loss.update(output_loss.item()) + + # Attention Score Loss (TernaryBERT Loss) + if args.attn_distill: + for i, (student_att, teacher_att) in enumerate(zip(student_atts, teacher_atts)): + + student_att = torch.where(student_att <= -1e2, torch.zeros_like(student_att).to("cuda"), + student_att) + teacher_att = torch.where(teacher_att <= -1e2, torch.zeros_like(teacher_att).to("cuda"), + teacher_att) + tmp_loss = MSELoss()(student_att, teacher_att) + attscore_loss += tmp_loss + l_att_loss.update(attscore_loss.item()) + + # Rep Distill (TernaryBERT Loss) + if args.rep_distill: + for i, (student_rep, teacher_rep) in enumerate(zip(student_reps, teacher_reps)): + tmp_loss = MSELoss()(student_rep, teacher_rep) + rep_loss += tmp_loss + l_rep_loss.update(rep_loss.item()) + + loss += cls_loss + rep_loss + output_loss + attscore_loss + l_loss.update(loss.item()) + + if n_gpu > 1: + loss = loss.mean() + + # Zero Step Loss Update + if global_step == 0: + if run is not None: + run["loss/total_loss"].log(value=l_loss.avg, step=global_step) + run["loss/gt_loss_loss"].log(value=l_gt_loss.avg, step=global_step) + run["loss/att_loss_loss"].log(value=l_att_loss.avg, step=global_step) + run["loss/rep_loss_loss"].log(value=l_rep_loss.avg, step=global_step) + run["loss/cls_loss_loss"].log(value=l_cls_loss.avg, step=global_step) + run["loss/output_loss_loss"].log(value=l_output_loss.avg, step=global_step) + + run["metrics/lr"].log(value=optimizer.get_lr()[0], step=global_step) + + loss.backward() + optimizer.step() + optimizer.zero_grad() + + global_step += 1 + # ================================================================================ # + # Evaluation + # ================================================================================ # + + if global_step % args.eval_step == 0 or global_step == num_train_optimization_steps-1: # period or last step + + student_model.eval() + + result = do_eval(student_model, task_name, eval_dataloader, + device, output_mode, eval_labels, num_labels, teacher_model=teacher_model) + + result['global_step'] = global_step + result['cls_loss'] = l_cls_loss.avg + result['att_loss'] = l_att_loss.avg + result['rep_loss'] = l_rep_loss.avg + result['loss'] = l_loss.avg + + # Basic Logging (Training Loss, Clip Val) + if run is not None: + + run["loss/total_loss"].log(value=l_loss.avg, step=global_step) + run["loss/gt_loss_loss"].log(value=l_gt_loss.avg, step=global_step) + run["loss/att_loss_loss"].log(value=l_att_loss.avg, step=global_step) + run["loss/rep_loss_loss"].log(value=l_rep_loss.avg, step=global_step) + run["loss/cls_loss_loss"].log(value=l_cls_loss.avg, step=global_step) + run["loss/output_loss_loss"].log(value=l_output_loss.avg, step=global_step) + run["metrics/lr"].log(value=optimizer.get_lr()[0], step=global_step) + + if task_name=='cola': + eval_score = result["mcc"] + if run is not None: + run["metrics/mcc"].log(value=result['mcc'], step=global_step) + + eval_result = result["mcc"] + # logger.info(f"Eval Result is {result['mcc']}") + elif task_name in ['sst-2','mnli','mnli-mm','qnli','rte','wnli']: + eval_score = result["acc"] + if run is not None: + run["metrics/acc"].log(value=result['acc'],step=global_step) + + logger.info(f"Eval Result is {result['acc']}") + eval_result = result["acc"] + elif task_name in ['mrpc','qqp']: + eval_score = result["acc_and_f1"] + if run is not None: + run["metrics/acc_and_f1"].log(value=result['acc_and_f1'],step=global_step) + + # logger.info(f"Eval Result is {result['acc']}, {result['f1']}") + eval_result = result["acc_and_f1"] + else: + eval_score = result["corr"] + if run is not None: + run["metrics/corr"].log(value=result['corr'],step=global_step) + + # logger.info(f"Eval Result is {result['corr']}") + eval_result = result["corr"] + + if args.training_type == "qat_step1": + logger.info(f"Gradual-{mixed_status}-{global_step}-SAVE : {eval_result*100}") + model_to_save = student_model.module if hasattr(student_model, 'module') else student_model + quant_model = copy.deepcopy(model_to_save) + + output_model_file = os.path.join(output_quant_dir, WEIGHTS_NAME) + output_config_file = os.path.join(output_quant_dir, CONFIG_NAME) + + torch.save(quant_model.state_dict(), output_model_file) + model_to_save.config.to_json_file(output_config_file) + tokenizer.save_vocabulary(output_quant_dir) + + # Save Model + save_model = False + + if task_name in acc_tasks and result['acc'] > best_dev_acc: + if task_name in ['sst-2','mnli','qnli','rte']: + previous_best = f"{result['acc']*100}" + elif task_name in ['mrpc','qqp']: + previous_best = f"{(result['f1'] + result['acc'])*50}" + best_dev_acc = result['acc'] + save_model = True + + if task_name in corr_tasks and result['corr'] > best_dev_acc: + previous_best = f"{result['corr']*100}" + best_dev_acc = result['corr'] + save_model = True + + if task_name in mcc_tasks and result['mcc'] > best_dev_acc: + previous_best = f"{result['mcc']*100}" + best_dev_acc = result['mcc'] + save_model = True + + if save_model: + # logger.info("====> Best Score *****") + # Test mnli-mm + if task_name == "mnli": + result = do_eval(student_model, 'mnli-mm', mm_eval_dataloader, + device, output_mode, mm_eval_labels, num_labels, teacher_model=teacher_model) + previous_best+= f"mm-acc:{result['acc']}" + + if args.training_type == "qat_step1": + logger.info(fp32_performance) + logger.info(previous_best) + + if args.save_fp_model: + # logger.info("***** Save full precision model *****") + model_to_save = student_model.module if hasattr(student_model, 'module') else student_model + output_model_file = os.path.join(output_dir, WEIGHTS_NAME) + output_config_file = os.path.join(output_dir, CONFIG_NAME) + + torch.save(model_to_save.state_dict(), output_model_file) + model_to_save.config.to_json_file(output_config_file) + tokenizer.save_vocabulary(output_dir) + + if args.save_quantized_model and not args.training_type == "qat_step1": + # logger.info("====> Save quantized model *****") + + # output_quant_dir = os.path.join(output_dir, 'quant') + output_quant_dir = os.path.join(output_dir, 'exploration') + if not os.path.exists(output_quant_dir): + os.mkdir(output_quant_dir) + + if not os.path.exists(output_quant_dir): + os.makedirs(output_quant_dir) + + output_quant_dir = os.path.join(output_quant_dir, args.exp_name) + if not os.path.exists(output_quant_dir): + os.makedirs(output_quant_dir) + + model_to_save = student_model.module if hasattr(student_model, 'module') else student_model + quant_model = copy.deepcopy(model_to_save) + + output_model_file = os.path.join(output_quant_dir, WEIGHTS_NAME) + output_config_file = os.path.join(output_quant_dir, CONFIG_NAME) + + torch.save(quant_model.state_dict(), output_model_file) + model_to_save.config.to_json_file(output_config_file) + tokenizer.save_vocabulary(output_quant_dir) + + + + # TI QAT Step-1 + if global_step >= num_train_optimization_steps and args.training_type == "qat_step1": + + if global_step >= ti_step_1_total_step: + logger.info(f"==> TI-step1 Last Result = {eval_result}") + best_txt = os.path.join(output_quant_dir, "best_info.txt") + with open(best_txt, "w") as f_w: + f_w.write(previous_best) + return + + logger.info(f"==> Previous Best = {previous_best}") + + # Save Best Score + if args.save_quantized_model: + best_txt = os.path.join(output_quant_dir, "best_info.txt") + last_txt = os.path.join(output_quant_dir, "last_info.txt") + with open(best_txt, "w") as f_w: + f_w.write(previous_best) + with open(last_txt, "w") as f_w: + f_w.write(f"{eval_result*100}") + # f_w.write(f"Last Result = {result}") + +if __name__ == "__main__": + main() diff --git a/notebooks/Attention_Output_Comp.ipynb b/notebooks/Attention_Output_Comp.ipynb new file mode 100644 index 0000000..7b4df9a --- /dev/null +++ b/notebooks/Attention_Output_Comp.ipynb @@ -0,0 +1,2127 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a03d62c7", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from __future__ import absolute_import, division, print_function\n", + "\n", + "import pprint\n", + "import argparse\n", + "import logging\n", + "import os\n", + "import random\n", + "import sys\n", + "import pickle\n", + "import copy\n", + "import collections\n", + "import math\n", + "\n", + "import numpy as np\n", + "import numpy\n", + "import torch\n", + "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler,TensorDataset\n", + "\n", + "from torch.nn import CrossEntropyLoss, MSELoss\n", + "\n", + "from transformer import BertForSequenceClassification,WEIGHTS_NAME, CONFIG_NAME\n", + "from transformer.modeling_quant import BertForSequenceClassification as QuantBertForSequenceClassification\n", + "from transformer import BertTokenizer\n", + "from transformer import BertAdam\n", + "from transformer import BertConfig\n", + "from transformer import QuantizeLinear, QuantizeAct, BertSelfAttention, FP_BertSelfAttention, ClipLinear\n", + "from utils_glue import *\n", + "from bertviz import model_view\n", + "\n", + "from tqdm import tqdm\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import torch.nn.functional as F\n", + "\n", + "class AverageMeter(object):\n", + " \"\"\"Computes and stores the average and current value\"\"\"\n", + " def __init__(self):\n", + " self.reset()\n", + "\n", + " def reset(self):\n", + " self.val = 0\n", + " self.avg = 0 \n", + " self.sum = 0\n", + " self.count = 0\n", + "\n", + " def update(self, val, n=1):\n", + " self.val = val\n", + " self.sum += val * n\n", + " self.count += n\n", + " self.avg = self.sum / self.count\n", + "\n", + "def do_eval(model, task_name, eval_dataloader,\n", + " device, output_mode, eval_labels, num_labels, teacher_model=None):\n", + " eval_loss = 0\n", + " nb_eval_steps = 0\n", + " preds = []\n", + "\n", + " for batch_ in tqdm(eval_dataloader, desc=\"Inference\"):\n", + " batch_ = tuple(t.to(device) for t in batch_)\n", + " \n", + " with torch.no_grad():\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch_\n", + "\n", + " # teacher attnmap test\n", + " if teacher_model is not None:\n", + " \n", + " # logits, _, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids, segment_ids, input_mask)\n", + " \n", + " # # logits, _, _, _, _ = model(input_ids, segment_ids, input_mask, teacher_probs=teacher_probs)\n", + " # logits, _, _, _, _ = model(input_ids, segment_ids, input_mask, teacher_probs=(teacher_probs, teacher_values, teacher_reps))\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids, segment_ids, input_mask)\n", + " logits, student_atts, student_reps, student_probs, student_values = model(input_ids, segment_ids, input_mask, teacher_outputs=(teacher_probs, teacher_values, teacher_reps, teacher_logits, teacher_atts), output_mode=output_mode, seq_lengths=seq_lengths)\n", + " else:\n", + " logits, _, _, _, _ = model(input_ids, segment_ids, input_mask)\n", + " \n", + " # create eval loss and other metric required by the task\n", + " if output_mode == \"classification\":\n", + " loss_fct = CrossEntropyLoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))\n", + " elif output_mode == \"regression\":\n", + " loss_fct = MSELoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1))\n", + "\n", + " eval_loss += tmp_eval_loss.mean().item()\n", + " nb_eval_steps += 1\n", + " if len(preds) == 0:\n", + " preds.append(logits.detach().cpu().numpy())\n", + " else:\n", + " preds[0] = np.append(\n", + " preds[0], logits.detach().cpu().numpy(), axis=0)\n", + "\n", + " eval_loss = eval_loss / nb_eval_steps\n", + "\n", + " preds = preds[0]\n", + " if output_mode == \"classification\":\n", + " preds = np.argmax(preds, axis=1)\n", + " elif output_mode == \"regression\":\n", + " preds = np.squeeze(preds)\n", + " result = compute_metrics(task_name, preds, eval_labels.numpy())\n", + " result['eval_loss'] = eval_loss\n", + " return result\n", + "\n", + "processors = {\n", + " \"cola\": ColaProcessor,\n", + " \"mnli\": MnliProcessor,\n", + " \"mnli-mm\": MnliMismatchedProcessor,\n", + " \"mrpc\": MrpcProcessor,\n", + " \"sst-2\": Sst2Processor,\n", + " \"sts-b\": StsbProcessor,\n", + " \"qqp\": QqpProcessor,\n", + " \"qnli\": QnliProcessor,\n", + " \"rte\": RteProcessor \n", + "}\n", + "\n", + "output_modes = {\n", + " \"cola\": \"classification\",\n", + " \"mnli\": \"classification\",\n", + " \"mrpc\": \"classification\",\n", + " \"sst-2\": \"classification\",\n", + " \"sts-b\": \"regression\",\n", + " \"qqp\": \"classification\",\n", + " \"qnli\": \"classification\",\n", + " \"rte\": \"classification\"\n", + "}\n", + "\n", + "default_params = {\n", + " \"cola\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\": 50}, # No Aug : 50 Aug : 400\n", + " \"mnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":8000},\n", + " \"mrpc\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"sst-2\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\":100},\n", + " \"sts-b\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"qqp\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"qnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"rte\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\": 20}\n", + " }\n", + "\n", + "def get_tensor_data(output_mode, features):\n", + " if output_mode == \"classification\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)\n", + " elif output_mode == \"regression\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)\n", + "\n", + "\n", + " all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long)\n", + " all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)\n", + " all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)\n", + " all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)\n", + " tensor_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,all_label_ids, all_seq_lengths)\n", + " return tensor_data, all_label_ids\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "38659af1", + "metadata": {}, + "outputs": [], + "source": [ + "task_name = \"cola\"\n", + "bert_size = \"base\"\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else: \n", + " layer_num = 12\n", + " head_num = 12\n", + " \n", + "teacher_model = None\n", + "\n", + "# torch.cuda.empty_cache()\n", + "# !nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "id": "5b1f9450", + "metadata": {}, + "source": [ + "# Device & Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e4c445a3", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10/06 10:10:18 AM Writing example 0 of 1043\n", + "10/06 10:10:18 AM *** Example ***\n", + "10/06 10:10:18 AM guid: dev-0\n", + "10/06 10:10:18 AM tokens: [CLS] the sailors rode the breeze clear of the rocks . [SEP]\n", + "10/06 10:10:18 AM input_ids: 101 1996 11279 8469 1996 9478 3154 1997 1996 5749 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "10/06 10:10:18 AM input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "10/06 10:10:18 AM segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "10/06 10:10:18 AM label: 1\n", + "10/06 10:10:18 AM label_id: 1\n", + "input_ids : tensor([[ 101, 2198, 2001, 7167, 2062, 27885, 3630, 25171, 2084, 5965,\n", + " 1012, 102]])\n", + "tokens : ['[CLS]', 'john', 'was', 'lots', 'more', 'ob', '##no', '##xious', 'than', 'fred', '.', '[SEP]']\n", + "A : john was lots more ob ##no ##xious than fred . \n", + "B : \n", + "tensor([11])\n", + "PUNC\n", + "comma -> tensor([], dtype=torch.int64)\n", + "period -> tensor([10])\n" + ] + } + ], + "source": [ + "device = \"cpu\"\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "teacher_model_dir = os.path.join(model_dir,task_name)\n", + "\n", + "# Processor & Task Info\n", + "processor = processors[task_name]()\n", + "output_mode = output_modes[task_name]\n", + "label_list = processor.get_labels()\n", + "num_labels = len(label_list)\n", + "\n", + "if task_name in default_params:\n", + " batch_size = default_params[task_name][\"batch_size\"]\n", + " max_seq_length = default_params[task_name][\"max_seq_length\"]\n", + " eval_step = default_params[task_name][\"eval_step\"]\n", + " \n", + "# Tokenizer\n", + "tokenizer = BertTokenizer.from_pretrained(teacher_model_dir, do_lower_case=True)\n", + "\n", + "\n", + "# Load Dataset\n", + "data_dir = os.path.join(\"data\",task_name)\n", + "processed_data_dir = os.path.join(data_dir,'preprocessed')\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)\n", + "eval_features = convert_examples_to_features(eval_examples, label_list, max_seq_length, tokenizer, output_mode)\n", + "# dev_file = train_file = os.path.join(processed_data_dir,'dev.pkl') \n", + "# eval_features = pickle.load(open(dev_file,'rb'))\n", + "\n", + "eval_data, eval_labels = get_tensor_data(\"regression\", eval_features)\n", + "eval_sampler = SequentialSampler(eval_data)\n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=1)\n", + "eval_data, eval_labels = get_tensor_data(output_mode, eval_features)\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)\n", + "\n", + "# Sampling Sentence \n", + "i = 0 \n", + "# num = 3\n", + "num = 12\n", + "\n", + "for step, batch in enumerate(eval_dataloader):\n", + " # model.train()\n", + " \n", + " batch = tuple(t.to(device) for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch\n", + " seq_length = seq_lengths[0]\n", + " i = i + 1\n", + " if i == num:\n", + " break\n", + "\n", + "input_ids_sliced = input_ids[:,:seq_length]\n", + "input_id = []\n", + "for i in input_ids_sliced[0]:\n", + " input_id.append(i.item())\n", + "tokens = tokenizer.convert_ids_to_tokens(input_id)\n", + "\n", + "\n", + "sample_sentence_a = str()\n", + "sample_sentence_b = str()\n", + "index = 0\n", + "\n", + "for i, word in enumerate(tokens[1:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_a += word\n", + " sample_sentence_a += \" \"\n", + "index = i\n", + "\n", + "for i, word in enumerate(tokens[index+2:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_b += word\n", + " sample_sentence_b += \" \"\n", + "\n", + "sep_index = torch.where(input_ids[0] == 102)[0]\n", + "\n", + "punc_index_1 = torch.where(input_ids[0] == 1010)[0] # comma\n", + "punc_index_2 = torch.where(input_ids[0] == 1012)[0] # period\n", + "punc_index = torch.cat([punc_index_1, punc_index_2])\n", + "\n", + "if len(sample_sentence_b) > 1:\n", + " sample_sentence_b_start = segment_ids[0].tolist().index(1)\n", + "else:\n", + " sample_sentence_b_start = None\n", + "\n", + "print(f\"input_ids : {input_ids_sliced}\")\n", + "print(f\"tokens : {tokens}\")\n", + "print(f\"A : {sample_sentence_a}\")\n", + "print(f\"B : {sample_sentence_b}\")\n", + "print(sep_index)\n", + "print(\"PUNC\")\n", + "print(f\"comma -> {punc_index_1}\")\n", + "print(f\"period -> {punc_index_2}\")\n", + "\n", + "for i, token in enumerate(tokens):\n", + " tokens[i] = token # + \"_\" + str(i)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1673, + "id": "c8a51eb6", + "metadata": {}, + "outputs": [], + "source": [ + "def ranking_loss_func(student_probs, teacher_probs):\n", + " Loss_ranking = 0\n", + "\n", + " loss_ranking_list = []\n", + "\n", + " for l in tqdm(range(layer_num)):\n", + " for h in range(head_num):\n", + " student_prob_plt = student_probs[l][0,h,:,:]\n", + " teacher_prob_plt = teacher_probs[l][0,h,:,:]\n", + " Loss_ranking = 0\n", + " for h in range(seq_length):\n", + " for idx in range(0, seq_length-1):\n", + " for jdx in range(1, seq_length):\n", + " p = (student_prob_plt[h][idx] - student_prob_plt[h][jdx])*(torch.sgn(teacher_prob_plt[h][idx] - teacher_prob_plt[h][jdx]))\n", + " # print(max(0, - p.item()))\n", + " Loss_ranking += max(0, - p.item())\n", + " loss_ranking_list.append(Loss_ranking)\n", + " return loss_ranking_list" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "12432934", + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device(\"cpu\")\n", + "\n", + "teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + "teacher_model.to(device)\n", + "teacher_model.eval()\n", + "\n", + "\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_zip = teacher_model(input_ids_sliced.to(device))\n", + "teacher_outputs = (teacher_probs, teacher_zip)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "e98f7365", + "metadata": {}, + "outputs": [], + "source": [ + "file_name= \"step_2_base_S_O_1_MIXED\"\n", + "student_model_dir = os.path.join(\"output\", task_name, \"exploration\", file_name)\n", + "student_config_OI = BertConfig.from_pretrained(student_model_dir)\n", + "# student_config_OI.teacher_attnmap = False\n", + "student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config_OI, num_labels=num_labels)\n", + "\n", + "model_2_outputs = student_model(input_ids_sliced.to(device), teacher_outputs=teacher_outputs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8aeea219", + "metadata": {}, + "outputs": [], + "source": [ + "file_name= \"step_2_base_S_O_1_OI\"\n", + "student_model_dir = os.path.join(\"output\", task_name, \"exploration\", file_name)\n", + "student_config_MIXED = BertConfig.from_pretrained(student_model_dir)\n", + "# student_config_MIXED.teacher_attnmap = False\n", + "# student_config_MIXED.teacher_output = False\n", + "student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config_MIXED, num_labels=num_labels)\n", + "\n", + "model_3_outputs = student_model(input_ids_sliced.to(device), teacher_outputs=teacher_outputs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "508844e8", + "metadata": {}, + "outputs": [], + "source": [ + "file_name= \"step_2_base_S_O_1_MI\"\n", + "student_model_dir = os.path.join(\"output\", task_name, \"exploration\", file_name)\n", + "student_config_MIXED = BertConfig.from_pretrained(student_model_dir)\n", + "# student_config_MIXED.teacher_attnmap = False\n", + "# student_config_MIXED.teacher_output = False\n", + "student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config_MIXED, num_labels=num_labels)\n", + "\n", + "model_4_outputs = student_model(input_ids_sliced.to(device), teacher_outputs=teacher_outputs)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "7049a495", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "\n", + "# student_logits, student_atts, student_reps_1, student_probs_1, student_zip_1 = model_1_outputs\n", + "student_logits, student_atts, student_reps_2, student_probs_2, student_zip_2 = model_2_outputs\n", + "student_logits, student_atts, student_reps_3, student_probs_3, student_zip_3 = model_3_outputs\n", + "student_logits, student_atts, student_reps_4, student_probs_4, student_zip_4 = model_4_outputs\n", + "\n", + "# model_outputs = (model_1_outputs, model_2_outputs, model_3_outputs, model_4_outputs)\n", + "model_outputs = (model_2_outputs, model_3_outputs, model_4_outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "3fc82e4c", + "metadata": {}, + "outputs": [], + "source": [ + "mag_dict = dict()\n", + "\n", + "for model_num in range(4):\n", + " for l in range(layer_num): \n", + " mag_dict[f\"{model_num}_min_{l}\"] = []; mag_dict[f\"{model_num}_max_{l}\"] = []; mag_dict[f\"{model_num}_std_{l}\"] = []; mag_dict[f\"{model_num}_mean_{l}\"] = []\n", + " if model_num == 0:\n", + " mag_dict[f\"tc_min_{l}\"] = []; mag_dict[f\"tc_max_{l}\"] = []; mag_dict[f\"tc_std_{l}\"] = []; mag_dict[f\"tc_mean_{l}\"] = []" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "f427f4e7", + "metadata": {}, + "outputs": [], + "source": [ + "model_num = 0\n", + "mse_func = MSELoss()\n", + "cos_func = torch.nn.CosineSimilarity(dim=-1)\n", + "\n", + "for model_output in model_outputs:\n", + " \n", + " student_logits, student_atts, student_reps, student_probs, student_zip = model_output\n", + " mag_dict[f\"{model_num}_ffn_mse\"] = []; mag_dict[f\"{model_num}_attn_cos\"] = []; mag_dict[f\"{model_num}_attn_mse\"] = []; mag_dict[f\"{model_num}_ffn_cos\"] = []\n", + " for l in range(layer_num):\n", + " tc_attn_context, tc_attn_output, tc_sa_output = teacher_zip[l]\n", + " st_attn_context, st_attn_output, st_sa_output = student_zip[l] \n", + " st_ffn_output = student_reps[1+1]\n", + " tc_ffn_output = teacher_reps[l+1]\n", + "\n", + " tc_output = tc_ffn_output\n", + " st_output = st_ffn_output\n", + " \n", + " # min-max\n", + " for token in range(len(tokens)):\n", + " if model_num == 0:\n", + " mag_dict[f\"tc_min_{l}\"].append(tc_output[0,token,:].min().item())\n", + " mag_dict[f\"tc_max_{l}\"].append(tc_output[0,token,:].max().item())\n", + " mag_dict[f\"tc_mean_{l}\"].append(tc_output[0,token,:].mean().item())\n", + " mag_dict[f\"tc_std_{l}\"].append(tc_output[0,token,:].std().item())\n", + " \n", + "# mag_dict[f\"tc_min_{l}\"].append(tc_output[0,:,token,:].min().item())\n", + "# mag_dict[f\"tc_max_{l}\"].append(tc_output[0,:,token,:].max().item())\n", + "# mag_dict[f\"tc_mean_{l}\"].append(tc_output[0,:,token,:].mean().item())\n", + "# mag_dict[f\"tc_std_{l}\"].append(tc_output[0,:,token,:].std().item())\n", + "\n", + " mag_dict[f\"{model_num}_min_{l}\"].append(st_output[0,token,:].min().item())\n", + " mag_dict[f\"{model_num}_max_{l}\"].append(st_output[0,token,:].max().item())\n", + " mag_dict[f\"{model_num}_mean_{l}\"].append(st_output[0,token,:].mean().item())\n", + " mag_dict[f\"{model_num}_std_{l}\"].append(st_output[0,token,:].std().item())\n", + "\n", + "# mag_dict[f\"{model_num}_min_{l}\"].append(st_output[0,:,token,:].min().item())\n", + "# mag_dict[f\"{model_num}_max_{l}\"].append(st_output[0,:,token,:].max().item())\n", + "# mag_dict[f\"{model_num}_mean_{l}\"].append(st_output[0,:,token,:].mean().item())\n", + "# mag_dict[f\"{model_num}_std_{l}\"].append(st_output[0,:,token,:].std().item()) \n", + " \n", + " \n", + " \n", + " mse_attn_diff = mse_func(st_attn_output[0,:,:], tc_attn_output[0,:,:]).item()\n", + " cos_attn_diff = torch.mean((1-cos_func(st_attn_output[0,:,:], tc_attn_output[0,:,:]))).item()\n", + " mag_dict[f\"{model_num}_attn_mse\"].append(mse_attn_diff)\n", + " mag_dict[f\"{model_num}_attn_cos\"].append(cos_attn_diff)\n", + "\n", + " mse_ffn_diff = mse_func(student_reps[l+1][0,:,:], teacher_reps[l+1][0,:,:]).item()\n", + " cos_ffn_diff = torch.mean((1-cos_func(student_reps[l+1][0,:,:], teacher_reps[l+1][0,:,:]))).item()\n", + " mag_dict[f\"{model_num}_ffn_mse\"].append(mse_ffn_diff)\n", + " mag_dict[f\"{model_num}_ffn_cos\"].append(cos_ffn_diff)\n", + "\n", + " model_num += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 1491, + "id": "83afa218", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor(0.0517, grad_fn=)" + ] + }, + "execution_count": 1491, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "st_map = student_probs[0]\n", + "tc_map = teacher_probs[0]\n", + "\n", + "kl_loss = torch.nn.KLDivLoss(reduction=\"batchmean\")\n", + "\n", + "kl_loss(st_map[0,0,:,:].log(), tc_map[0,0,:,:])" + ] + }, + { + "cell_type": "code", + "execution_count": 1680, + "id": "2aff2294", + "metadata": {}, + "outputs": [], + "source": [ + "model_num = 0\n", + "mse_func = MSELoss()\n", + "cos_func = torch.nn.CosineSimilarity(dim=-1)\n", + "kl_loss = torch.nn.KLDivLoss(reduction=\"batchmean\")\n", + "\n", + "context_similarity = torch.randn(2,layer_num, head_num)\n", + "cont_dict = dict()\n", + "cont_dict[\"0_cont\"] = []\n", + "cont_dict[\"1_cont\"] = []\n", + "\n", + "cont_dict[\"0_sa\"] = []\n", + "cont_dict[\"1_sa\"] = []\n", + "\n", + "map_similarity = torch.randn(2,layer_num, head_num)\n", + "\n", + "for model_output in model_outputs:\n", + " \n", + " student_logits, student_atts, student_reps, student_probs, student_zip = model_output\n", + " for l in range(layer_num):\n", + " tc_attn_context, tc_attn_output, tc_sa_output = teacher_zip[l]\n", + " st_attn_context, st_attn_output, st_sa_output = student_zip[l] \n", + " st_ffn_output = student_reps[1+1]\n", + " tc_ffn_output = teacher_reps[l+1]\n", + " \n", + " sa_diff = mse_func(tc_sa_output, st_sa_output)\n", + " cont_dict[f\"{model_num}_sa\"].append(sa_diff.item())\n", + "\n", + " st_map = student_probs[1]\n", + " tc_map = teacher_probs[l]\n", + "\n", + " tc_output = tc_attn_context\n", + " st_output = st_attn_context\n", + " \n", + " for h in range(head_num):\n", + " cos_sim = cos_func(tc_output[0,h,:,:], st_output[0,h,:,:]).mean()\n", + " context_similarity[model_num][l,h] = 1 - cos_sim\n", + " \n", + " cont_dict[f\"{model_num}_cont\"].append((1-cos_sim).item())\n", + " \n", + " map_sim = kl_loss(st_map[0,h,:,:].log(), tc_map[0,h,:,:])\n", + " map_similarity[model_num][l,h] = map_sim\n", + " \n", + " model_num += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "a7e6c1eb", + "metadata": {}, + "outputs": [], + "source": [ + "mag_dict = torch.load(\"sts-b-step-2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "5ed36361", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "lw = 4.5\n", + "ms = 13\n", + "layer_num = 12\n", + "label_1 = \"TI-M\"\n", + "label_2 = \"TI-O\"\n", + "label_3 = \"TI-G\"\n", + "label_4 = \"Q\"\n", + "sub = \"attn\"\n", + "\n", + "fig, ax1 = plt.subplots(1, 1, figsize=(5, 4), dpi=120)\n", + "plt.grid(axis=\"y\")\n", + "ax1.plot(list(range(layer_num)), mag_dict[f\"0_{sub}_mse\"], label=label_1, color=\"tab:red\", linewidth=lw, marker=\"o\", markersize=ms)\n", + "ax1.plot(list(range(layer_num)), mag_dict[f\"1_{sub}_mse\"], label=label_2, color=\"darkgoldenrod\", linewidth=lw, marker=\"^\", markersize=ms)\n", + "ax1.plot(list(range(layer_num)), mag_dict[f\"2_{sub}_mse\"], label=label_3, color=\"tab:blue\", linewidth=lw, marker=\"D\", markersize=ms)\n", + "# ax1.plot(list(range(layer_num)), mag_dict[f\"3_{sub}_mse\"], label=label_4, color=\"tab:orange\", linewidth=lw, marker=\"D\", markersize=ms)\n", + "# ax1.set_title(f\"Attention Output Loss per-layer\", fontsize=20)\n", + "\n", + "# fig, ax2 = plt.subplots(1, 1, figsize=(12, 7), dpi=50)\n", + "# sub = \"ffn\"\n", + "# plt.grid(axis=\"y\")\n", + "# # ax2.plot(list(range(layer_num)), mag_dict[f\"3_{sub}_mse\"], label=label_4, color=\"dimgray\", linewidth=lw, marker=\"D\", markersize=ms)\n", + "# ax2.plot(list(range(layer_num)), mag_dict[f\"0_{sub}_mse\"], label=label_1, color=\"tab:blue\", linewidth=lw, marker=\"^\", markersize=ms)\n", + "# ax2.plot(list(range(layer_num)), mag_dict[f\"1_{sub}_mse\"], label=label_2, color=\"darkblue\", linewidth=lw, marker=\"o\", markersize=ms)\n", + "# ax2.plot(list(range(layer_num)), mag_dict[f\"2_{sub}_mse\"], label=label_3, color=\"tab:blue\", linewidth=lw, marker=\"D\", markersize=ms)\n", + "\n", + "# ax2.set_title(f\"Attention {sub} Output per-layer\", fontsize=20)\n", + "\n", + "# ax2.plot(list(range(layer_num)), mag_dict[f\"0_{sub}_cos\"], label=f\"MI-COS\", color=\"tab:red\", linewidth=lw, marker=\"o\", markersize=ms)\n", + "# ax2.plot(list(range(layer_num)), mag_dict[f\"1_{sub}_cos\"], label=f\"OI-COS\", color=\"darkgoldenrod\", linewidth=lw, marker=\"^\", markersize=ms)\n", + "# ax2.plot(list(range(layer_num)), mag_dict[f\"2_{sub}_cos\"], label=f\"MIXED-COS\", color=\"tab:blue\", linewidth=lw, marker=\"D\", markersize=ms)\n", + "\n", + "# ax3.set_title(f\"FFN Output Layer {l}\", fontsize=20)\n", + "# ax3.plot(list(range(len(tokens))), mag_dict[f\"0_ffn_mse_{l}\"], label=\"1SB_M_ffn_mse\", color=\"orange\", linewidth=2.5)\n", + "# ax3.plot(list(range(len(tokens))), mag_dict[f\"1_ffn_mse_{l}\"], label=\"1SB_O_ffn_mse\", color=\"dodgerblue\", linewidth=2.5)\n", + "\n", + "# ax4.plot(list(range(len(tokens))), mag_dict[f\"0_ffn_cos_{l}\"], label=\"1SB_M_ffn_cos\", color=\"orange\", linewidth=2.5)\n", + "# ax4.plot(list(range(len(tokens))), mag_dict[f\"1_ffn_cos_{l}\"], label=\"1SB_O_ffn_cos\", color=\"dodgerblue\", linewidth=2.5)\n", + "\n", + "fs=18\n", + "ax1.legend(loc=2, fontsize=fs)\n", + "# ax2.legend(loc=2, fontsize=18)\n", + "ax1.set_xlabel(\"Layer Number\", fontsize=fs)\n", + "ax1.set_ylabel(f\"MSE Loss\", fontsize=fs)\n", + "ax1.tick_params(axis=\"x\", labelsize=fs)\n", + "# ax2.tick_params(axis=\"x\", labelsize=22)\n", + "ax1.tick_params(axis=\"y\", labelsize=fs)\n", + "# ax2.tick_params(axis=\"y\", labelsize=22)\n", + "\n", + " # ax3.legend(loc=2, fontsize=15)\n", + " # ax4.legend(loc=2, fontsize=15)\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1681, + "id": "c29d4dc1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1681, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmYAAAFECAYAAACNoPIqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAArEAAAKxAFmbYLUAABb50lEQVR4nO3deVyU9d4//tc1KwMDwybojLiCqIjihrigWZaCWFp5XFIjo3PufnU29W75ntNZO6fTSe963Kf7Pt1HXMpKyzqZIZhW7oZLqWTuCwqDguwMMPv1+2NgZAQVleEa4PV8PHw087muuXhfczz68rNdgiiKIoiIiIhIcjKpCyAiIiIiFwYzIiIiIh/BYEZERETkIxjMiIiIiHwEgxkRERGRj2AwIyIiIvIRkgezrKwsxMbGIiYmBpmZmR7H6urqkJKSgoEDByIuLg7/+Mc/3MeWLVuG2NhYxMfHY/HixbDb7QCAnTt3Ijg4GAkJCUhISMCbb77ZrvdDREREdLcEKfcxs9vtGDx4MHbs2AGdToeRI0di//79CAsLA+AKZocOHcKkSZNgMpkwatQoZGVlITo6Gtu3b8fkyZMhl8uxYMECPPDAA1i8eDF27tyJt99+G5988kmr64iMjETfvn29dZtEREREbhcvXkRxcXGLxxTtXIuHgwcPIi4uDgaDAQCQkpKCbdu2Yd68eQAAf39/TJo0CQCg1WoRGxuLK1euIDo6Gg8++KD7OqNGjYLRaLzrOvr27Yvc3Nx7uBMiIiKi1klKSrrpMUmHMouKityhDAAMBsNNA1ZBQQHy8vIwYsQIj3a73Y4PP/wQDz30kLtt586dGDZsGB5++GGcO3euxetlZmYiKSkJSUlJKCkpaYO7ISIiIro3ks8xaw2LxYI5c+bgjTfeQEBAgMex//zP/0RSUhLGjBkDABgxYgTy8/Nx7NgxPPXUU+7etxtlZGQgNzcXubm5iIiI8Po9EBEREd2OpMFMr9d79JAZjUbo9XqPc0RRxKJFi5CamorHH3/c49j//u//4uTJkx4T/IOCgqDVagEAs2bNwuXLl+FwOLx4F0RERERtQ9JglpiYiOPHj8NoNMJkMiEnJwdTp071OOfll1+Gv78/fvvb33q0b9myBZmZmfj444+hUFyfKtd0Mt3evXvRrVs3yOVy794IERERURuQdPK/QqHAihUrMHnyZDidTrzwwgsICwtDamoqMjMz4XQ68frrr2Pw4MFISEgAALz++uuYOnUqfvnLX8Jms2HixIkAgNmzZ+M3v/kNPv74Y7zzzjtQKpXQarVYt26dhHdIRERE1HqSbpfhK5KSkrgqk4iIiNrFrXJHh5j8T0RERNQVMJgRERER+QhJ55h1NBaLBSUlJTCbzVzp6SVyuRx+fn6IiIiAWq2WuhwiIupCKjdtAkRAmzwBivBwSWpgMGul6upqFBcXIzw8HD169IBcLocgCFKX1amIogiHw4GamhpcvnwZkZGRCAoKkrosIiLqAkSbDVWbPoezpgYVH3yAnv/7P1BKsM8pg1krlZaWwmAwwN/fX+pSOi1BEKBQKBASEgK1Wo2rV68ymBERUbuoP3oUzpoaAICqb19JQhnAOWatZrVaodFopC6jy9BoNLBarVKXQUREXYRpz173a23DVlxSYDC7Axy6bD/8romIqL046+pQd/Cg641MhoAJ4yWrhcGMiIiIurTa3AMQbTYAgCY+HoqQEMlqYTAjIiKiLs20Z7f7dcDEZAkrYTAjIiKiLsxeVgbzD8cBAIJKhYAxYySth8GMiIiIuqzavXuBhqdT+icmQibxQj8GM2qV/Px8CIIAQRDQvXt32O32Fs87efKk+7w+ffq429euXQtBEPC3v/2tnSomIiK6PdPuPe7X2knSrcZsxGBGd0ShUKC4uBjZ2dktHl+1ahVkMhlkMv7WIiIi32a9fBnW/HwAgFwXBM3QodIWBAYzukPjxo2DTqfD6tWrmx2z2+14//33MWXKFCiVSgmqIyIiar2mvWUB48dDUEi/7z6DGd0RjUaDuXPnYsuWLSgpKfE4lpWVheLiYixevFii6oiIiFpHdDo9V2MmS7sasxGDGd2xxYsXw263Y926dR7tq1evRmhoKGbOnClNYURERK1kPnkSjtIyAICiR3eoY2IkrshF+j67TuCJzFxYbE6py7gltVKGDzKS2uRaiYmJGDJkCNasWYOlS5cCAK5evYqcnBw8++yzUKvVbfJziIiIvKV29/XeMm3yRJ954gyDWRuw2Jyw2H07mLW1xYsXY8mSJThw4ADGjBmDd999F3a7ncOYRETk80SrFbX7v3W/10q8qWxTDGZtQK30/RHhtq5xwYIFePHFF7F69WqMGTMGa9aswfDhw5GQkNCmP4eIiKit1X3/PZx1dQAA9YABUPboIXFF1zGYtYG2GiLsSLp164YZM2Zgw4YNmD17Nk6fPo1//OMfUpdFRER0Wx57l/lQbxnAyf90D55++mlUV1cjPT0dfn5+eOKJJ6QuiYiI6JYcJhPqv/vO9UYmQ8C4cdIWdAMGM7prU6dOhcFggNFoxMyZMxESEiJ1SURERLdUu38/xIan12iGJ0Cu00lckSfJg1lWVhZiY2MRExODzMxMj2N1dXVISUnBwIEDERcX5zFUVlpaismTJyMmJgaPPvoozGYzAMBsNuPRRx9FTEwMJk+ejNLS0na9n65ELpdj06ZN+Oyzz/Daa69JXQ4REdFt1e5pOow5ScJKWiZpMLPb7ViyZAm++eYbHDlyBG+88QbKyso8znnppZdw6tQpHDhwAP/zP/+Dc+fOAQD+9re/4bHHHsPZs2fRr18/d6jLzMxEv379cPbsWTz22GN8NqOXjRo1CjNnzvR4LiYREZEvspWUwHziJABA0PjBf/QoiStqTtJgdvDgQcTFxcFgMECr1SIlJQXbtm1zH/f398ekSa40q9VqERsbiytXrgAANm/ejIULFwJwrRD84osvbtlOREREXVvtnr3u1wFjkiDzwX03JV2VWVRUBIPB4H7fOF+pJQUFBcjLy8OIESMAAFVVVdA1jAs3/VzTawYHB6OysrLF62VmZrp72W58tBA116dPH4ii2OrzG4eWG6WnpyM9Pb2NqyIiImodURRharqp7KSJElZzc5LPMWsNi8WCOXPm4I033kBAQECbXDMjIwO5ubnIzc1FREREm1yTiIiIfJP1Yj5shYUAAHlICPyGDJG4opZJGsz0er1HD5nRaIRer/c4RxRFLFq0CKmpqXj88cfd7TqdDlVVVc0+1/SalZWVCA4O9vJdEBERka8z7d7lfh0wYTwEmW/2TUlaVWJiIo4fPw6j0QiTyYScnBxMnTrV45yXX34Z/v7++O1vf+vRnpaW5n6I9vvvv48ZM2a02J6WltYOd0JERES+SnQ6PeaX+eJqzEaSBjOFQoEVK1Zg8uTJSEhIwNKlSxEWFobU1FQUFRWhsLAQr7/+Og4ePIiEhAQkJCTgyy+/BOAKbBs3bkR0dDTOnTuHjIwMAMAzzzyDc+fOITo6Ghs3bsRLL70k5S0SERGRxMw//ABHw5xzZVQUVH37SFrPrUj+SKaHH34YDz/8sEdbdna2+/XNJpx369YNu3btatau0WiwadOmNq2RiIiIOi6PRzAlJ0MQBAmruTXfHGAlIiIiagNOiwW1B3Ld7wOSJ0hYze0xmBEREVGnVXfoMMR61xZOfoMHQenjOzEwmBEREVGn5bEaMzlZwkpah8GMiIiIOiVHVRXqjxwFAAgKBQLGjZO2oFZgMCMiIqJOqXb/fsDpBABoRo6EXKuVuKLbYzAjIiKiTsm0q8kjmCb6/jAmwGBGREREnZDtyhVYzp4FAMgCAuDf8KxtX8dgRkRERJ1O073LAsYmQVCpJKym9RjMiIiIqFMRRRGmPdeHMQMmTpSwmjvDYEZ3bMeOHZgzZw6ioqKgVqsRGhqKCRMm4M0334TZbG52/n333QdBEHD16lUJqiUioq7GcvYs7Fdcf+fIw8PgN2iQxBW1HoMZtZrdbsfPfvYz3H///diyZQuSkpKwZMkSzJ07F1evXsWSJUswbNgwnDt3TupSiYioC6vd3WTSf/JECLKOE3ckf1YmdRwvv/wy/vWvf2H06NH47LPPYDAY3MccDgf+9Kc/4U9/+hOmTZuG77//HkFBQRJWS0REXZFot8O0b5/7vXZSxxnGBNhjRq105swZ/Nd//RdCQ0PxxRdfeIQyAJDL5fjjH/+I+fPn4/z581i+fLlElRIRUVdWn5cHZ3UNAEDVpw9UUVESV3RnGMyoVd599104nU789Kc/RWRk5E3Pe+WVVwAAq1evbq/SiIiI3Dri3mVNcSizDVx66imIFqvUZdySoFah95o1d/35/fv3AwAeeOCBW543cOBA6PV6GI1GFBQUIKqD/UuFiIg6Lmd9PeoOHnS9EQQETJggbUF3gcGsDYgWK0SLReoyvKpxRWVrglZUVBSKiopw5coVBjMiImo3tQcOQLS6Okr84odAERYmcUV3jsGsDQhq39+0riPUSEREdC9uXI3ZETGYtYF7GSLsKLp3745Tp06hoKAAsbGxtzy3oKAAANCjR4/2KI2IiAj2igrU/3AcACAolQhIGiNxRXeHk/+pVcaNGwcA+Prrr2953qlTp1BUVASDwcBhTCIiaje1e/cBTicAwD8xETJ/f4krujsMZtQqixYtgkwmw8qVK3Ht2rWbnveXv/wFALB48eL2Ko2IiAgmj2HMjjfpvxGDGbVKbGwsfvnLX6KsrAwzZszAlStXPI47nU78+c9/xvvvv4/+/ftj2bJlElVKRERdjbWgANYLFwAAssBAaBISpC3oHnCOGbXa3//+d1RVVWH16tWIiYnB9OnT0b9/f1RXV2Pbtm04e/YsYmJikJ2dzV3/iYio3Zj27HG/Dhg/DoJSKWE190byHrOsrCzExsYiJiYGmZmZzY4/99xziIyMxKhRozzak5OTkZCQgISEBHTr1g2/+tWvAABr165FRESE+9hHH33UHrfRJSgUCqxatQrbt29Hamoq9u7di+XLl+ODDz5AeHg4VqxYgWPHjiE6OlrqUomIqIsQnU7U7r4ezLQTO+ZqzEaS9pjZ7XYsWbIEO3bsgE6nw8iRIzFr1iyENdl3ZP78+Vi8eDF+9rOfeXx2T5N0PGHCBMycOdP9ftGiRXwkkBdNmTIFU6ZMafX5O3fu9F4xRETUpVlOnYK9Ye6zIjIS6gEDJK7o3kjaY3bw4EHExcXBYDBAq9UiJSUF27Zt8zhn/PjxHkHtRkajERcvXsTEDp6QiYiI6M6ZbugtEwRBwmrunaTBrHFbhUYGgwFGo/GOrrFx40Y89thjkMmu38r69esxdOhQzJ8/H8XFxS1+LjMzE0lJSUhKSkJJScnd3QARERFJRrRaUfvtt+73HfHZmDeSfI7Zvfr4448xZ84c9/sZM2bgwoULyMvLQ2JiIp5//vkWP5eRkYHc3Fzk5uYiIiKivcolIiKiNlJ35CicJhMAQB3dH0q9XuKK7p2kwazxYdeNjEYj9HfwpV6+fBmFhYXuzU8BICwsDGq1GgDwzDPP4NChQ21XMBEREfkM0+5d7tcBnWRKk6TBLDExEcePH4fRaITJZEJOTg6mTp3a6s9v3LgRs2fP9hhPbnzYNgBs2rQJcXFxbVozERERSc9hqkX94e9cb2QyaMePl7agNiJpMFMoFFixYgUmT56MhIQELF26FGFhYUhNTUVRUREAID09HWPHjkVeXh569uyJjRs3uj//8ccf4yc/+YnHNd966y0MGTIEw4YNw7vvvou33367Xe+JiIiIvK8u91uIdjsAQDNsGOTBwdIW1EYEURRFqYuQWlJSEnJzc295zqlTpxAbG9vhV3t0FKIo4vTp0xg4cKDUpRARkQ+68srvYD5xAgDQ7Ve/hDa540z8v1Xu6PCT/9uLXC6HvSGZk/fZ7XbI5XKpyyAiIh9kv3bNHcoEPz/4jx4tcUVth8GslbRaLaqrq6Uuo8uoqqqCVquVugwiIvJBpr373K8DxiRC5ucnYTVti8GslcLCwlBeXo7S0lLYbDZwBLjtiaIIm82G0tJSVFRU3HJjYSIi6ppEUYRpV5PVmB1oCLM1+BDzVlKpVOjduzfKysqQn58Ph8MhdUmdklwuh1arRe/evaFSqaQuh4iIfIw1Px+2ggIAgFyng2boUIkralsMZndApVKhR48eUpdBRETUZdU2eVZ2QHIyhE42H5lDmURERNQhiE4nTHv2ut93hkcw3YjBjIiIiDoE848/wlFeDgBQGgxQ9esncUVtj8GMiIiIOgTT7t3u19qJyZ1yb1EGMyIiIvJ5TqsVtd9+637f2VZjNmIwIyIiIp9Xd+gQxHozAEA9MBbKyEiJK/IOBjMiIiLyebW7r6/G1E6cJGEl3sVgRkRERD7NUV2NuiNHXG8UcgSMGyttQV7EYEZEREQ+rXb/t0DDxu7+w0dAHhgocUXew2BGREREPu3G1ZidGYMZERER+SxbcTEsp08DAGQaDTSjRklckXcxmBEREZHPatpb5j82CbJO/hxlBjMiIiLySaIootZjGLPzrsZsxGBGREREPsl6/jxsRVcAAPKwUPjFDZa4Iu9jMCMiIiKfZGq6d9mEZAiyzh9bOv8dEhERUYcjOhyo3bvX/b6zr8ZsxGBGREREPqf+WB4cVVUAAFXvXlD16SNtQe2EwYyIiIh8jmnP9Un/ARMnSlhJ+5I8mGVlZSE2NhYxMTHIzMxsdvy5555DZGQkRt2wb0l6ejr69euHhIQEJCQk4Pz58wAAs9mMRx99FDExMZg8eTJKS0vb5T6IiIiobTjr61F34KDrjSBAO2GCtAW1I0mDmd1ux5IlS/DNN9/gyJEjeOONN1BWVuZxzvz585Gdnd3i5//7v/8bR48exdGjR9G/f38AQGZmJvr164ezZ8/isccew9/+9jev3wcRERG1nbqDByFaLAAAv7g4KMLDJa6o/UgazA4ePIi4uDgYDAZotVqkpKRg27ZtHueMHz8eYWFhrb7m5s2bsXDhQgDAggUL8MUXX7RpzURERORdHqsxJ3WdYUxA4mBWVFQEg8Hgfm8wGGA0Glv9+WXLlmHYsGF4+eWX4Wh4uGnTawYHB6OysrLFz2ZmZiIpKQlJSUkoKSm5+5sgIiKiNuOorER9Xh4AQFAqETBmjMQVtS/J55jdrddeew0nT57EgQMHcOHCBbzzzjt39PmMjAzk5uYiNzcXERERXqqSiIiI7oRp7z7A6QQA+I8aCVlAgMQVtS9Jg5ler/foITMajdDr9a36bI8ePSAIAvz8/LBo0SIcOnSo2TUrKysRHBzc5nUTERGRd9R20dWYjSQNZomJiTh+/DiMRiNMJhNycnIwderUVn32yhXXIxqcTic2b96MuLg4AEBaWhrWrVsHAHj//feRlpbmneKJiIioTVkLjbCcc+2yINNq4T98uMQVtT9Jg5lCocCKFSswefJkJCQkYOnSpQgLC0NqaiqKiooAuLbFGDt2LPLy8tCzZ09s3LgRAPDEE09g6NChGDp0KBwOB37xi18AAJ555hmcO3cO0dHR2LhxI1566SXJ7o+IiIhaz6O3bNw4CEqlhNVIQxBFUZS6CKklJSUhNzdX6jKIiIi6LFEUUfj/PQd7w4K8Hn95FX4DB0pclXfcKnd02Mn/RERE1HlYzpxxhzJFRATUsbESVyQNBjMiIiKSnGlXk2HM5AkQBEHCaqTDYEZERESSEm021O7b536v7YKrMRsxmBEREZGk6o8ehdNkAgCo+veDqmdPiSuSDoMZERERScq0+/owpja56/aWAQxmREREJCFnbS3qDh12vZHJEDBhvLQFSYzBjIiIiCRTe+AARJsNAKCJj4ciJETiiqTFYEZERESS8RjGnNS1hzEBBjMiIiKSiL2sDObjPwIABLUa/omJElckPQYzIiIikoRpzx6g4QFE/omjIdNoJK5IegxmREREJIna3Xvcr7UTJ0lYie9gMCMiIqJ2Z83Ph/XSJQCAXBcEzbChElfkGxjMiIiIqN2ZmvSWBYwfD0Eul7Aa38FgRkRERO1KdDph2tt0GJOrMRsxmBEREVG7Mp84CUdZOQBAqe8BVXS0xBX5DgYzIiIialem3bvcrwOSkyEIgoTV+BYGMyIiImo3otWKuv3fut9zGNOTV4JZSUkJ8vLyvHFpIiIi6sDqvvsOzvp6AIA6NhbK7t0lrsi3tCqYyeVy/PnPf/Zo++ijj/Doo4+2eP4///lPDB8+/N6rIyIiok7FtKvJI5iSJ0hYiW9qVTATRRFiw868jU6dOoXPP//cK0URERFR5+MwmVB35HvXG7kcAePGSVuQD+IcMyIiImoXtfv3A3YHAMB/eALkOp3EFfkeBjMiIiJqF6bd14cxA5I56b8lkgezrKwsxMbGIiYmBpmZmc2OP/fcc4iMjMSoUaM82ufPn4/Y2FgMGTIEL7/8srt97dq1iIiIQEJCAhISEvDRRx95/R6IiIjo1mzFJbCcPAUAEDR+8B896jaf6JokDWZ2ux1LlizBN998gyNHjuCNN95AWVmZxznz589HdnZ2s88uWrQIp06dwpEjR7B//3588803HseOHj2Ko0ePYs6cOV6/DyIiIrq12iY7/QckjYVMrZawGt8laTA7ePAg4uLiYDAYoNVqkZKSgm3btnmcM378eISFhTX77LRp0yAIApRKJRISEmA0GturbCIiIroDoih6rsacmCxhNb5N0doT3377bWzYsMH9vrS0FAAwePDgZuc2HrudoqIiGAwG93uDwXDHAaumpgZbtmzBsmXL3G3r16/Htm3bMGTIELz55puIjIxs9rnMzEz30GlJSckd/UwiIiJqPevFi7A1/P0uDwmB35AhElfku1odzEpLS1sMXKdOnWrx/PZ4vIIoikhPT8ezzz6LqKgoAMCMGTMwb948qNVqvPXWW3j++eexcePGZp/NyMhARkYGACApKcnrtRIREXVVN+5dJsgkn+Lus1r1zTidzjv+5XA4bntdvV7v0UNmNBqh1+tbXfyLL76IkJAQLF261N0WFhYGdcO49TPPPINDhw61+npERETUtkSHA7V797rfB/ARTLckaWRNTEzE8ePHYTQaYTKZkJOTg6lTp7bqs++88w6OHDmCf/7znx7tV69edb/etGkT4uLi2rRmIiIiaj3zDz/AUVkJAFBGRUHVp4+k9fg6SYOZQqHAihUrMHnyZCQkJGDp0qUICwtDamoqioqKAADp6ekYO3Ys8vLy0LNnT/ew5PPPP4/8/HyMHj0aCQkJWLNmDQDgrbfewpAhQzBs2DC8++67ePvttyW7PyIioq6u6d5l2okT22WqU0cmiDc+a6kF9fX1uHLlCsLDwxEUFORxLD8/H7/+9a/d21UkJydj+fLlGDhwoHcq9oKkpCTk5uZKXQYREVGn4jSbcfnpDIhmMwAg6p1/QtGtm8RVSe9WuaNVPWb/+Mc/EBMTg5MnT3q0V1VVYeLEidi8eTNqampQU1OD7OxsTJo0CcXFxfdeOREREXVYdYcOu0OZ3+DBDGWt0Kpgtnv3bvTq1QtjxozxaH/77bdRWFiIiRMn4sKFCygpKcGvf/1rXLt2DW+++aZXCiYiIqKOweMRTNy7rFVaFcxOnDiB5OTmX+hnn30GQRCwevVq9OnTB+Hh4VixYgUGDBiAL7/8ss2LJSIioo7BUVmJ+qNHAQCCQoGAsWOlLaiDaFUwu3btGnr16uXRVl9fj2PHjiE+Ph59+/b1ODZ58mRcuHCh7aokIiKiDqV2/37A6QQAaEaNhFyrlbiijqFVG8za7XaYTCaPtmPHjsHhcCAxMbHZ+WFhYbBYLG1TIREREXUYosOB6pytqGzytCBtMvcua61WBbOoqCh8//33Hm179uyBIAgtBrPy8nJ04wQ/IiKiLqX++I8oW5UJ2+UCd5s8PAz+I4ZLWFXH0qqhzClTpmDfvn348MMPAbg2cX3nnXcgk8mQmpra7PzvvvsOvXv3bttKiYiIyCfZy8pQ8l9v4urvf+8RytSxsej+yisQVCoJq+tYWhXMXn75ZQQFBWHhwoUICwtD7969cfHiRSxatKjZI5QKCwtx+PBhTJo0ySsFExERkW8QrVZUfvpvFP78F6jdt8/dLtfpEP7z59Hj1T9D1bOnhBV2PK0eyty5cyeWLFmC3NxcREZG4ic/+Qn+8pe/NDt3zZo1CAoKarEnjYiIiDqHuu+/R9nq1bBfuf4oRMhkCJqeipDZsyELCJCuuA6sVTv/d3bc+Z+IiKh1bFevonzNWtQdPuzR7hc/BGFPPw1VVJRElXUct8odreoxIyIioq7NabGg6t//RtXnmyHabO52eXgYwtLT4Z+UxOdgtoFWBbN+/frd8YUFQcD58+fv+HNERETeIjocqDtwADXf7IBMo0Hgg1PgFx/PQHELoiiiLjcX5Wvfhb201N0uKBTQzXwEulmzIPPzk7DCzqVVwSw/Px9yuRwKBTvYiIio43GYTKj56itU5+TAUVrmbq/dvx9KgwFBKdOgve8+yDQaCav0PdaCApStXg1z3g8e7f6jRiI0PR3KHj0kqqzzuqOkdd9992Hx4sWYOXMmlEqlt2oiIiJqE9bCQlRvyYZp1y6IN9n43GY0oixzFco/+ACB992HwGkpUPU0tHOlvsVZV4eKjz9GdXYO4HC42xU9uiPsqafgP3KkhNV1bq0KZidOnEBmZiY++OADzJ07F6GhoViwYAEWL16M+Ph4b9dIRETUaqLTifqjx1CdnY36I0eaHVfq9QhKTYGzrg7VX34JR1m563P1ZlTnbEV1zlZohg1FUEoKNCNHQpC1amepTkF0OmHavRsV762Do6rK3S6o1Qh+7DHoZqRxTzIvu6NVmQ6HA1988QVWr16NrVu3wuFwYPjw4Xj66acxf/586HQ6b9bqNVyVSUTU8TnNZph27kJ1djZsRmOz45phwxCUlgZNwjB32BLtdtQdPozq7ByYf/yx2WcU3bohcNpUBD7wAOSBgV6/BylZLlxEWWYmLKdPe7QHjB+P0EULoQgPl6iyzudWueOut8soLi7G2rVrsXbtWpw+fRoajQazZs3CX//612YPPPd1DGZERB2X/do1VOdsRc1XX8FZW+txTFCpoJ00CUHTU2+7jYP10iVU52yFaffuZsOeglKJgOQJCEpJgfouFsT5MkdNDSo+XI+a7duBJpFAGRWFsKefhiZ+iITVdU5eCWZNff3110hPT0dRURE+++wzPPzww/d6yXbFYEZE1LGIogjL6dOozspC7YGDgNPpcVweFoqglFQEPjgFcq32jq7tMNXCtGMHqr/c6rl5agN1bCyCUlIQkDQGQgeeby06najZ/hUqPvwQTpPJ3S7TaBA8dy6Cpk2FwEV/XuG1fcwOHTqE1atXY8OGDaiqqoLBYEBPPnqBiIi8RLTZULt/P6qzs2E513xLJnVsLIKmpyJgzJi7DhVybQB0M9IQND3VNVdtaw7qvz/i7k2ynD6Na6dPo1ynQ+BDDyHwoQehCA29p/tqb+bTp1G2MhPWixc92rX3T0boE09AHhwsTWF058GstLQU69atw5o1a/Djjz9CoVBgxowZePrppzF16lTIutAkSSIiah+OykpUb9+Omq1fwlFZ6XlQLkfAuLHQTZ8OdUxMm/1MQSaD/4jh8B8xHLYrV1D95ZcwfbPDPVzqqKpC5caNqPz3vxEwZgyCUqZBPWiQT++JZq+oQMW692HatcujXdW/H8KezoBf7ACJKqNGrRrKdDqdyM7OxurVq7FlyxbYbDYMGTIEixcvxoIFCxDewScEciiTiMg3WS5eRHV2Nmr37PXYbR4AZEGBCHzwQQRNm9ZuPVZOsxmmPXtQk5MD66XLzY6r+vRBUMo0BCQnQ6ZWt0tNrSHa7ajOyUHFRx9BrDe722WBgQh5Yj4CH3igS60+ldo9zzHT6/UoLi6GTqfD3LlzsXjxYowaNarNC5UKgxkR+QJnfT1qtm2DtaAQSoMB6uhoqPv3g8zfX+rS2pXodLpWSm7Jhvn48WbHlb2ioEtLc4UfibZuEEURlpMnUZ2zFbW5uc3muMkCAhD4wP0InDYNyshISWpsVJ+Xh7JVq2ErLLzeKJMhaOpDCJ47947n4NG9u+dgJpPJoFQqMW7cOGhauSuyIAjYsmXLbc/LysrC0qVL4XQ68eKLLyIjI8Pj+HPPPYdPPvkEUVFRONzkgannz5/HnDlzUFlZiSlTpuCf//wnBEFAaWkpZs+ejcLCQsTHx+PDDz+E320eFcFgRkRSEp1OmHbsQMX6DXBUVHgeFAQoe/Z0hbToaKhjoqHq1atDTzq/GWddHWq+2YHq7GzYi4s9DwoC/EeORFDadPgNGeJTw4X2sjLUbN+Omm3bPfb+AgAIAjQjhiNoWorHNh3tUte1ayh/913Ufuv595t60ECEPf001H37tlst5KlNgtmdEgQBjia7BbfEbrdj8ODB2LFjB3Q6HUaOHIn9+/cjLCzMfc6+ffvg5+eHn/3sZx7B7PHHH0d6ejrS0tI8Xi9btgx9+vTB888/7/H6VhjMiEgqdUeOoPy992C7XNDqzwhKJVR9+kAd0xjWYqDo3r3DDkXZrl5FdXYOar752mOYDQAEjR8CJ9+PoNQUn3/8j2i1ovbAAVTnbG22FxgAKPU9EDh1GgIn3wdZQIBX66javBmVn/4botXqbpeHhCB00UIEJCf7VLDtiu55VebFG1ZttJWDBw8iLi4OBoPr0RcpKSnYtm0b5s2b5z5n/PjxyM/P9/icKIrYv38/Nm7cCABYsGABvvjiC6SlpWHz5s04dOiQu/3FF1+8bTAjImpv1kuXUP7eOtQfPerRroyKQtD0VNivXYPl7FlYz52Hs67O4xzRZoPl7FlYzp51t8n8/aGK7g91TMz1sBYS0h63cldEUYT5+HFUZ21B3XffeeyfBQCKiAgEpaYi8P7JXg0xbUlQqaBNToY2ORmW8+dRvXWrx9w4W9EVlK9Zg4r166GdOBFBKdOgauN9P+sOH0bZ6jWePY5yOXRpaQie/TifBdoBtCqY9e7d2ys/vKioyB3KAMBgMMDYwm7NNyorK0NoaKg78Tf9XFVVlfsJBLe6XmZmJjIzMwEAJSUl93QfREStZS8vR8WGDTDt2OkxL0mu0yF47hzXJGy53N0uOp2wX7kCy7lzrl9nz8F68SJEu93jus66OpjzfvB42LQ8LBTq6Bj3EKi6f3/J56s5rVbU7tmD6i1bWpw87xcXh6C06fAfNarD9gACgLp/f3R77jmELlyImq+/Qc3WrbCXlgIARLMZNdu2oWbbNtf9pqbAf/Roj//d75StqAhla9ai/vvvPdo1w4YidPHTXf7Znx1Jl905LiMjwz2fLSkpSeJqiKizc5rNqPp8M6o+/9xjV3lBpYLukYehe+SRFnszBJkMSoMBSoMB2kmTALh6zKyXLrmDmuX8edfE7ht6nRxl5agrO4C6AwcaLiZcX1TQ0Lum6tWrXZ59aC8vR/XWrajZvh3O6hrPe1QoEDAxGUGpqZ1u3pM8KAjBs2ZC98jDrgUNOTke4dn8448w//ija0PcqQ2PfrqDPcScZjMqP/0U1Zu/8Ajrim7dEJqeDv8xiRy27GAkDWZ6vd6jR8toNCIxMfG2nwsLC0N5eTlEUYQgCDAajdDr9QAAnU7n7jVr2k5EJIWbTuwXBGjvuw8h8+ZC0WRebWsISqV7MQCmudqcdXWwnL/Q0LPmGuZsfDj39WJE2AoLYSsshGnnTte1FIrr89UahkEVPXq0WW+V5dw5VG3Zgtp9+4Eb5h3Lg4MROG0qgh58sNNvaCrIZAhITERAYiKshYWo2boVNTt3uufUOcrKUfHhelR+vBEB48e5Hv10iz3ZRFFE7b79KH/vXY//nQWlErpZs6CbNVOyFat0byQNZomJiTh+/DiMRiN0Oh1ycnLwyiuv3PZzgiAgKSkJW7ZsQVpaGj744AMsWrQIAJCWloZ169bh+eefx/vvv48ZM2Z4+zaIiFpUf/Qoyt97r9mQnd/QeIQuWtSmvUMyf39o4od4PNfQXlEB67lzMJ89C+u5c7CcO9/sWZKi3e4eJkXOVte1NBqoGhcWRMdAHRN9R/uEiQ4HanNzUb0lu8VJ8Kr+/aCbPh0BY8e2S2+dr1H17ImwjAyEzJ8P065dqM7Oga2oCIDrfw/Trt0w7doNdXR/BKakQDtunMf3ZL18GWWZq5o9dN0/MRGh6elQRka06/1Q22qTZ2Xei82bN2PZsmVwOp144YUX8NOf/hSpqanIzMyEXq9Heno6vvzyS5SVlSEiIgJvvvkmZs+ejbNnz2Lu3LmorKzEAw88gHfeeQcymQzXrl3D448/DqPRiCFDhmD9+vW33eKDqzKJqC1ZL19G+bvvtTixP3TRQmiGD5dkeEl0OmG/evX6EOi5hvlqN2zc2hJ5aKjHlh3q/v2bTcp3mEyo+eorVOfkwFFa5nkBmQwBYxIRlJYGdWwsh9eaEEUR5h9+QHV2jmshxI17ogUFInDKFGiTk1Hz1deozsnxOEep74HQxYvhP3x4e5dOd8nrDzHv6BjMiKgt2CsqULlhA2q+2dGqif2+QLTZYL18uWGumiuwtTRfrSWu+Wr9oY6OhrXANTzadHsGoGGj1SlTEDhtKpQR7Mm5HVtxiWthwFdfeTxYvCWCnx+CZz8O3fTpnXJfu86Mwew2GMyI6F64J/Zv3gzRfH0fLkGlgu7hGdDNnNmhtilw1tc3zFc7696yo3FFYWsp9XoETU+FdtKkDnXvvsJptaJ2715UZ+c0e9A4AAQkT0DowoV3PD+RfMM972NGRETNiU4nTDt3ouLD9c0n9k+ahJD58zrkX5wyjQaaIXHQDIlztzXOV2s6DHrjfDUA0CQkIGj69Hbf5b6zkalUCLz/fmgnT4blzBlU5+SgLvcAlL2iEPrkk9DExd3+ItQhMZgREd2F+mPHUP7ue7BeuuTR7hc/BKGLnoS6X+fa9kEREgLF6NHwHz0agGtelP3KFVjOn4fl3HkIKiW0EydCFRUlcaWdiyAI8IuNhV9sLESnk2G3C2AwIyK6A9bLl1079h854tGu7NnTNbF/xIguMbFdEAQo9Xoo9Xpok5OlLqdLYCjrGhjMiIhawV5RgcqPPkLN1990mIn9RNTxMJgREd2C02xG1ebNqPr8hon9SiWCHp6B4JkzJX/MERF1HgxmREQtcE/sX78BjvImO+gLArQTJyLkifkdcmI/Efk2BjMiohvU5+W5Jvbn53u0+w0ZgtAnF0Hdr580hRFRp8dgRkTUwFpQ4Nqxv4tP7Cci6TCYEVGX55rY/zFqvv76hon9QQieMxeBUzixn4jaB4MZEXVZTovFNbF/0+ec2E9EPoHBjIhape7771G7bz8gEyAPDoY8SAd5sA5y3fVfsqCgDrHXkmti/y5UrF/vObEfuL5jf3i4RNURUVfGYEZEt2QrLkb56jWoO3z49icLAuRBgZA1hrYgnSvE6YKuhzedDnJdMOTBOsjUau/fwA1uOrE/Lg6h6U9yYj8RSYrBjIha5LRaUfXZJlR99hlEm611HxJFOKqq4aiqhq2g4LanC35+kAcFNfTABbnCmrsHLtj134ZeOZlWe0+9cTed2G8wuCb2jxzJif1EJDkGMyJqpu7QIZStXgN7SYm7TVAoEPTIw1D36w9HdRUclZVwVlfDUVUFR2WV679VVXCaTK3+OaLZDLvZ7PFzbkomgzwwEPLgYMjcPXCe4a1xOFUeHAyZSgXgNhP7fzIHgQ9O4cR+IvIZDGZE5OYatlyNusPfebRrRoxA2OKnoOzR47bXEG02OGpqGsJaQ3irrGzoSatq8qsSjqoqwO5oXXFOp/uzrSFo/CDXBcNRWcmJ/UTUYTCYEdFNhy0VEREIW/wUNKNGtXqYT1AqoQgNhSI09LbniqIIZ20dnA09cI5mIc4V3pwNvXLOurpW35NYb4a9/qpHm3bSRITMmwdFt26tvg4RUXtiMCPqwkRRRP3hw82HLZVK6GbOhO7RWe4hQW8QBAFybQDk2gAo9frb12u1NvTGNYS3yko4qhuC2w1Dqo6qKvfQpV9cnGvH/v79vXYvRERtgcGMqIuyXb2KslWrUf/99x7tmpEjELZ4MZTdu0tU2c0JKhUUYWGtekal6HS6etgcDsh1unaojojo3jGYEXUxTosFVZ995tpUtYVhS//RoyWsru0IMhnkWq3UZRAR3REGM6IuQhRF1B06hPI1a5sPW86aBd2smV4dtiQiottjMCPqAmxXrriGLW/Yw8t/1CiELn4KyshIiSojIqKmJH92SlZWFmJjYxETE4PMzMxmxw8ePIi4uDhER0fjT3/6k7s9OTkZCQkJSEhIQLdu3fCrX/0KALB27VpERES4j3300UftdStEPsdpsaBi/XoYf/Vrj1CmiIhA5MsvIfLllxjKiIh8iKQ9Zna7HUuWLMGOHTug0+kwcuRIzJo1C2FNJvY+99xzWL9+PeLi4jB+/HjMmjUL8fHx2LNnj/ucCRMmYObMme73ixYtwvLly9vzVoh8iiiKqDt4COVr1sB+7Zq7ncOWRES+TdJg1tgbZjAYAAApKSnYtm0b5s2bBwAoKiqC3W7H0KFDAQBz585FVlYW4uPj3dcwGo24ePEiJk6c2P43QOSDbEVFKFu95ibDlouhjIyQqDIiIrodSYNZUVGRO5QBgMFggNFovOXxXbt2eVxj48aNeOyxxyBr8gy99evXY9u2bRgyZAjefPNNRLYwVJOZmekeOi1pzeNgiHyc02JB5aefovrzzRDtdne7IjISYU8vhv/IkRJWR0RErSH5HLN79fHHH2POnDnu9zNmzMCFCxeQl5eHxMREPP/88y1+LiMjA7m5ucjNzUVEBHsQqOMSRRG1uQdg/MUvUfXpv92hTFAqETx3Dnq+9SZDGRFRByFpj5ler/foITMajUhMTLzlcX2T3cEvX76MwsJCjBs3zt3WdH7aM888g7feestL1RNJz1ZU5FptefSoR7v/6NEIfeopDlsSEXUwkgazxMREHD9+HEajETqdDjk5OXjllVfcx/V6PeRyOfLy8hAXF4cNGzZg5cqV7uMbN27E7NmzPZ7hd/XqVXRv2LF806ZNiIuLa78bImonTrMZlf/+d/Nhy+7dXcOWI0ZIWB0REd0tSYOZQqHAihUrMHnyZDidTrzwwgsICwtDamoqMjMzodfr8fbbb2PevHkwm81YuHChx8T/jz/+GP/93//tcc233noLWVlZkMvliIyMxP/93/+1920ReY0oiqg7cABla9bAUVrmbhdUKgQ/9ih0Dz8MgastiYg6LEEURVHqIqSWlJSE3NxcqcsguiVbURHKMleh/tgxj3b/xESEpqdz2JKIqIO4Ve7gzv9EPs5pNqPy009RtXkzYHe42xU9uiNs8dPwHzFcwuqIiKgtMZgR+ahbDls+/hh0M2Zw2JKIqJNhMCPyQTajEWWrVqH+WJ5Hu3/SGIQ++SSU3OKFiKhTYjAj8iHO+npUfvIpqrK+8Bi2VOp7IHTxYvgP57AlEVFnxmBG5ANEUUTdt9+ibO1aOMrK3e2uYcvHoZuRxmFLIqIugMGMSGLWQiPKVmXCnPeDR7t/0hiEpadD0a2bRJUREVF7YzAjkoizvh6VGz9B1ZasZsOWYU8/DU1CgnTFERGRJBjMiNqZs74eph07ULlpk+ewpVrtGrZMm85hSyKiLorBjKid2IpLUJ2TDdNXX8NZX+9xLGBsEkKffJLDlkREXRyDGZEXiaIIy6lTqMrKQt3BQ4DT6XFcqdcjLONpaIYNk6hCIiLyJQxmRF4g2myo3b8fVVu2wHr+QrPjfoMHIWj6dPiPHg1BLpegQiIi8kUMZkRtyFFVhZrt21GdsxWOykrPgwo5tOPHI2j6dKj795ekPiIi8m0MZkRtwHrpEqq2bEHt7j0QbTaPY7KgQARNnYrAqVOhCAmRqEIiIuoIGMyI7pLodKL+yBFUZWU124MMAJS9oqBLS0NAcjJkXGVJREStwGBGdIec9fUw7dyJ6uxs2IquNDvuP2okgqZPh198PARBkKBCIiLqqBjMiFrJVlKCmq1bUbP9Kzjr6jyOCX5+CJx8H4JSU6HU66UpkIiIOjwGM3ITRRH1hw+7Nj6trII6Ohp+gwbBL24wlAYDBJlM6hLbnSiKsJw+jeqsLNQeONhsuwtFeDiCpqdCe/8DkGsDJKqSiIg6CwYzAgBYzp1D+bvvwXzihLvNfvUqavfuBQDItFr4DRwIv7jB8Bs0CKq+fSEoOu9vH9FmQ21uLqqzsmA5d77ZcfXAWOjS0uCfmMjtLoiIqM103r9ZqVVsxSWoWP8havfsveV5TpMJdYcPo+7wYQCuoTv1gBhXj9rgOKgHxHSKCe6O6mrXdhdbv4SjvNzzoFyOgPHjoJs+HeroaGkKJCKiTo3BrItymEyo+ve/Ub0lG6Ld7m4XNH4InjkTAePGwXL+PMw/noD51CnYCgo8Pi+azTDn/XB9NaJCDnX/hqHPwYOgjh3YoYb2rAUFqN6yBaadu5pvdxEYiMCHHkTQtGlQhIZKVCEREXUFDGZdjGi1ovrLbaj85BM4TabrB2QyBD74IEJ+Mhvy4GAArscFaZOTAbh6kswnT8F88gQsJ0/CcuGi53wruwOW06dhOX0aVZs2AYIAVe/e8Bs8CH6DBkE9aJDP7eElOp2oP3oU1VlZqD+W1+y4MioKurTpCJg4sVP0BhIRke9jMOsiRFFE7f79qHj/A9hLSjyO+Y8ejZAFC6Dqabjp5+VBQQgYk4iAMYkAXFtGWM6cgfnECZhPnITl7FnPniZRhDU/H9b8fFRn5wAAlPoeUA8cCL/Bg+E3eDAUERGSbCfhNJtd211s2dLidheaESOgS5sOv6FDud0FERG1K8mDWVZWFpYuXQqn04kXX3wRGRkZHscPHjyIp556ChaLBYsWLcLvfvc7AEB6ejp2796NoKAgAMCnn36K/v37w2w2Y/78+fjhhx/Qs2dPbNy4EeHh4e1+X77EfPIkyt99D5azZz3a1dH9EbJoETRxcXd8TZlGA82wYe6Hb4tWKywXLsB84iTMJ07AcuoUnPX1Hp+xFV2BregKTN/sAADIw0LhN9A19Ok3eDCUPXt6deWn/do1VOdsRc1XX8FZW+txTFCroZ18H3SpqVAabh5QiYiIvEkQRVGU6ofb7XYMHjwYO3bsgE6nw8iRI7F//36EhYW5zxk9ejRWrVqFuLg4jB8/HitXrkR8fDzS09Px+OOPIy0tzeOab7/9NvLz87F8+XKP17eSlJSE3Nxcr9yjlGxFRShf9z7qDh70aFdERCDkiScQMG6s14KQ6HTCeumSK6SdPAnziZNwVFXd8jPulZ+DXUOf6n797nnlpyiKsJw5g+qsLajNzW223YU8PAxBKSkInDIFcq32nn4WERFRa9wqd0jaY3bw4EHExcXB0NBDkZKSgm3btmHevHkAgKKiItjtdgwdOhQAMHfuXGRlZSE+Pv6m19y8eTPeeOMNAMCCBQswZsyY2wazzsZRWYmKjRtRs227RxCRabUIfvwxBE2dCsHLc6YEmQzqvn2h7tsXmD4doijCfuWKe+jTfPJksyHVZis/1WqoYwe4FhQMGuxa+alWt+rni3Y7ar9t3O7iXLPj6thY6NKmw3/MGG53QUREPkPSYFZUVOQOZQBgMBhgNBpveXzXrl3u98uWLcNvfvMbpKam4tVXX4VcLvf4THBwMCorK1v82ZmZmcjMzAQAlNwQEDoqp8WC6qwsVH72GcR6s7tdUCgQmDINwY8/LlmvkCAIUOr1UOr1CJwyBQBgLytzD32aT55svvLTYml55WfDfmotrfx01NSgZtt2VH+5FY6yFra7GDsWurTpUMfEeO1eiYiI7pbkc8zu1muvvYbu3bvDYrHgySefxDvvvIPnnnuu1Z/PyMhwz2dLSkryVpntQnQ6Ydq5CxUb1jcLIwHjxyPkiSegjIyQqLqbU4SFQZs8AdrkCQBcocp88mTD0OeJW6/8/Pxz18rPXr1cQ58DBsB84iRMu3ZBtFo9fo5Mq72+3UWTYXIiIiJfI2kw0+v1Hj1kRqMRiYmJtzyub3gOYY8ePQAAfn5+WLRoETZu3OjxmfDwcFRWViK4YeuHzqr+2DGUv7cO1vx8j3a/wYMQumhRh+oZkgcGIiAxEQGJTVZ+nj17feXnmTPNV35eugTrpUtAztZm11P27Ol6XNKkSa0eAiUiIpKSpMEsMTERx48fh9FohE6nQ05ODl555RX3cb1eD7lcjry8PMTFxWHDhg1YuXIlAODKlSvo0aMHnE4nNm/ejLiGlYVpaWlYt24dhg0bhvfff7/Z4oD25nSKkMnafssFa34+yte9j/qjRz3alXo9QhYugP/o0R1+qweZRgPN0KHQNMwxFK1WWC5e9Fz5ecPDxAFAM3y4a7uLYcM6/HdARERdi6TBTKFQYMWKFZg8eTKcTideeOEFhIWFITU1FZmZmdDr9Xj77bcxb948mM1mLFy40D3x/4knnkBpaSmcTieSkpLwi1/8AgDwzDPPYN68eYiOjobBYMAnn3wi2f3VWx14cvVB9A7zR5/wAPTvFoC+4Vr0DvOHn/LuJpzby8pQsWEDTDt2Ak0W1Mp1OgT/5CcInPJAp32GpaBSwS82Fn6xscCsme6Vn5aTJ2E5dw6yoCAETpkCVc+eUpdKRER0VyTdLsNXeGu7jJNXqvHCJ813lBcEQK/ToG+3APRtCGx9wgIQGqC6aQ+Ps74elZ99huovsjzmUAkqFXQPz4Bu5kzINJo2vwciIiJqWz67XUZnV1xthkwAnDdEX1EEjJX1MFbWY+/ZUne7TqNEn3B/9AvXom+3APQLD4Beq0T9jh2o/Phjz33ABAHayfchZO5cTmgnIiLqJBjMvOi+2AiM6x+Oy+V1uFhai4ulJlwsrcWFa7WoszqanV9Vb8OxgiocK6gCRBH9is5gwg/fILKuAiqFHGqlDGqFDIEjhiMyfRFUffq0/00RERGR1zCYeZlKIUN0hBbREVoAkQBcu9GX1FjcIS2/rBYXrplQXG0BAESUFyH52FcwXLsMALAAsNiduOAXib3xD6AgrB8id11D3x/r0Ddci77hAejXLQARgWpOdiciIurAGMwkIAgCIoP8EBnkh6R+14chqwqKULj6PVgPfAur3QmLUgar3Ykav0B8G38fTvWOhyi4HqFUXG1BcbUFuReu71vmr5KjX8N8tX7dXIGtV6g/VArvPX+SiIiI2g6DmQ9wmEyo/OQT1ORshb/dDn+NEgAgaPwQ9MhM1Ix/AME1Ngxq6GG7WFqLqnpbs+vUWR04bqzGcWO1u00mAD1D/dEvPKChZ02LvmEB0Pkr2+3+iIiIqHUYzCQkWq2o3roVlZ/+G06T6foBmQxBUx9yPUIpOBihAHoDQGzD50QRFXU2XCw1uYPaxdJaGCvrceMaW6cIXC6rw+WyOuw8fc3dHhqgcg+B9m0IbXqdxit7rhEREVHrMJhJQBRF1O7bj4oPPmj2IG//xESELngCyibPCL2RIAgIDVAhNCAUI3uHutvNNgculdW5AltpLS42zF8z25zNrlFea0V5rRXfXapwt6kVMvQOc4W1yCA/yGWATBAgCAJkguu1TEDD++ttgvtY0+OATHaH5ze0CU0+29L5MpkAAYBcJkCAAJkMECBAKRc4x46IiDo0BrN2Vv/jj6h47z1Yzp33aFfHxCB00UL4DR5819f2U8oR2z0Qsd0D3W1Op4gr1Wbkl7oWGFwsrcOFUhPKTNZmn7fYnThTXIMzxTV3XYOUgv2VGNYzGAlRwUjoFYxwLR/DREREHQuDWTuxFhpRsW4d6g4f9mhXREYi5In5CBg3ziu9PTKZAEOwBoZgDcZHh7vbq802XGwYBr3QMBR6ubwOzhs3XetAKuts2HXmGnadcQ3ZGoI1SOjlCmrxBh0C1PztTkREvo1/U3mZo7ISFR9vRM327YDz+pCiTKtF8OOPI2jqQxBUqnavK8hPiWFRwRgWFexus9qdKKyow4VrrsUFIlw9biJEOJyAUxQhiiKcouu1U0TD++ttouj6zPX3rTi/SZsoinA6Wz7fIYqACDia1ASIcDhF2J0iKus8F0Q0buK7Je8KZAIwIDIQw6JcQS22eyCUcq5WJSIi38Jg5kU133yDstWrIdab3W2CQoGg6anQPfoo5FqthNU1p1LI0K+bFv26+VZdrVVqsuDo5UocK6zE0YJKj6DmFIFTV2tw6moNPjpUAD+lDEMMOiREBWN4VAiiQjWcn0ZERHA4RdRZ7Qj0k2b3AgYzL5LrdB6hLCB5AkLmzYcyMkLCqjqvcK0aUwZHYsrgSIiiiEtldTha4Appx41VsNiv91iabU4czq/A4fwKABcR7K/E8Ia5aUN7cn4aEVFX4XSKuFxeh2OFlcgrrMIPxiok9Q3FkodiJamHwcyLNCNGwG/IEEAUEbpoIdTR0VKX1GUIgoA+4QHoEx6AmcMNsDYsbDhSUImjlytxrqTG4xmmlXU27Dh9DTsathTpFeqPYVE6DOvpCmoalVyiOyEiorYkiiKuVJmRV1iJY4VV+KGwqtneoMcKqyCKoiQjKQxmXiQIAiJffAGChsNkUlMpXEOXQww6LEzqDZPF7vo/ZUEVjhZUoKjS7HH+5fI6XC6vwxfHrkAmExAbqUVCVAgSooIxIFILRReYn2axO1BSbUFJjRkl1RYUV5tRbbZjQKQWyTHduJiCiDqMUpMFPxRWuXvFrtVYbnquIAAh/kqYLNIMZwqieOOWpF1PUlIScnNzpS6DJFRSbcaxwiocuVyBvBb+9dSURilHfE+dayFBz+AOOz+tpeBVUuN61FdJjbnZYoqmVAoZxkeH46HBkYjTB3XI+yeizqvabLsexAqqYKysv+X5UaEaDO0ZjKEGHeJ76rweyG6VOxjMwGBGnpxOEflltThaUIljBZU4XlQNq735Jr2NQgNUGBYVjOENq1xDA9p/lW1LLHYHrjUErWs15obnqzaGr1sHrzvRQ+eHKYMicf+gCM7NawNFlfUoqqyHn1IOP6UcGpUc/g3/VStkDMFELai3OvBjURWOFrh6xPLLaps9CaepiEC1K4hF6TDUoENYO//ZxWB2GwxmdCtWuxOnrla7FhJcrsS5a6Zb/h++V6g/hvdyhbQhep3X5qdZ7U6UNAQubwSvYH8lIgL9EBmkRkSgGpFBflDIZdh95hqOFVY2+w5kAjC8VwgeGhyJ0X1DuR3JHTBW1mPf2VLsPVeKi6W1Nz1PJuB6WFPJoVEqoFHJ4K9SwE/pavNXyd2vNUrXL3+1wv36+mflfAQbdViNfy4fK6xCXkElzhR7zhu+UbC/EvEG10jHsJ7BiAxSS/qPHAaz22AwoztR09BFfqRhxefVKvNNz5XJBAzqHoiEht60AZGBkLfyL0Or3Ylrpoaw1RC4rla1bfDq1hC4Ihv+GxGkRkSgH7oFquGnvHmgLKk246uTJfj6ZDFKWpirodMocV9sNzw0uDt6hfnfU52dVWFFHfadK8Xec2XIv0UY8ya1QgZNQ0jzV8kbXl8Pe02DnF9DsAtQX3/d9ByVgkGcvMfhFHG2pAZ5BVXIM1biRFE1bI6bxxd/lRzxBh2G+uiUEwaz22Awo3tRXG12b8txrKASNWb7Tc/VqOQYatAhoZfrX20ymeARvFyvLSiusaCitvljs+5E0+DV2OMV2crg1VpOp4g8YxW2n7iKb8+XtfgH5YDIQDw4OAITB3SDv6prLxgoKG8MY6W4VFbX4jlBGgXiDcEQRRF1VgfqbQ7UWx2os9rdr33xAR0alRxRIf7oFeqPniEa9ArzR1SIPyIC1eyZozvmdIq4VF7nXqR13FiFepvjpuerFDLE6YMwtGcwhvXUoX83rU//vmMwuw0GM2orTqeIC6W1ONYQ1H4sqrrlv+ruhU6jRESgGhENgcsbwetO1Jhdj8TafqIYF6417wHqqgsGWhvGxvUPx/jocMQbdLfsVRVFEVaHsyGsNQ1uja8bA5zTFeYa2uusDo/XZpsr7Hnr92cjlUKGniGa66Et1PW6h86vS6xuptYRRRFFVWb8UFiJowVV+MFYier6m/8jVyYTMLDhaS5De+owIDKwQ/XaMpjdBoMZeYvF7sDJKzU4erkCxwqrcP4289OaujF4XZ/v5RpybO/gdSfOXzNh+4li7Dp9DSZL8z9ce+j8MGVwJO4f2DkXDBSU12FvQxi7fJMwptMoMbZ/WKvCmDfZHU53uGsMbdeD2/Ww1/ja3HC87obXVXXWO+rJkzc8x7cxqPUK9UdUqD8MwZoO9Rcs3b1Sk8XdI5ZXWIlS081HCQQB6N9Ni6E9dRjaMxhx+iCf/jPwdhjMboPBjNpLtdmGvALXEu4TRdVQK2ToFqRGZEPY6h7k1yGCV2tZ7U7kXijD9hPFN10wMKJ3CB4c1PEXDFwuc4WxfedKcbm85TAW7K9EUr8wTIgOxxAJw5g3WO1OFFXWo6CiDgXlrv9eLq+DsaIejjtIbDIBiAzyQ1SoP6KaDIn2DPHnRs8dXFV9ky0sCiub7R95o16h/q6tiXoGY4ghSLJHJHmDTwezrKwsLF26FE6nEy+++CIyMjI8jh88eBBPPfUULBYLFi1ahN/97ncAgPnz5+O7776DUqnEjBkz8NprrwEA1q5dixdeeAF6vR4A8PLLL2POnDm3rIHBjMj7OuOCgTsJY8kx4Rii1/n0vBdvcDhFXK0243JZHQoq6lDYsHlzYUW9x2PSWiNcq3L3rPV097JpOtVf2J2BwymixmxDZZ0NxdVm/GCsQl5h1S1XHANAZFDDFhYNYSzER7Ye8gafDWZ2ux2DBw/Gjh07oNPpMHLkSOzfvx9hYWHuc0aPHo1Vq1YhLi4O48ePx8qVKxEfH4+tW7di6tSpsNvtmDJlCn7/+9/j/vvvx9q1a3H8+HEsX7681XUwmBG1n8YFA9t+vIrcC7daMBCJiQPCfW7BQGvD2Nj+DT1jXTCMtYbTKeKayYLChp61y2X17td11ptP8m5JsL+yoYfNFdQah0aD/ZVdZi6jN4miCJPFjmqzHZV1VlTV2VBVb0NlfcN/62yoqre6X5ss9lZN2Qj2VzY89s61jUVkkJ/3b8ZH3Cp3SPon3sGDBxEXFweDwQAASElJwbZt2zBv3jwAQFFREex2O4YOHQoAmDt3LrKyshAfH49p06YBAJRKJRISEmA0GqW5CSK6IzKZgISoYCREBbsXDGz7sdjjX9NnimtwprgGmXsuYHx0OB6UeMHA5bI67Dl3DfvOlaKgvOUdxIP9lRjXPxwTosMRpw9iGLsNmUxoWLDih5G9Q93toiiivNaKgop6FJTXNQyNuoZHb/ZEjso6GyrrXM88bEqrVrhWiDb0skWFahAV6o9uWmn3sPIFZpsDVfU2VDcErMrGsFVnbdZWVW+7o+HomwlQy917iQ01+N4WFr5C0mBWVFTkDmUAYDAYPAJWS8d37drlcY2amhps2bIFy5Ytc7etX78e27Ztw5AhQ/Dmm28iMjKy2c/OzMxEZmYmAKCkpKTN7omIWi/QT4m0oXqkDdW3uGDAYnfim1Ml+OZUiXvBwAMDI7y+S7coirhcfr1n7HZhLDkmHIN7MIy1BUEQEKZVI0yrRkJUsMexqnobCsrrUNgwj+1yQ3Aru8mkcZPFjlNXa3Dqao1Hu59ShqgQf4QEqKCUy6CSC1DIZVApZFDIBKgUMijljb8EqOQyKJq8ViqaH1MpZFDKBFe7wnVMKZO12+8Ju8OJarPdHaQq66xNXjfv2TLb7mwY+U4E+ikQ7K+ETqOETqNCTIQWw6J06Bfu21tY+ArfGiO4Q6IoIj09Hc8++yyioqIAADNmzMC8efOgVqvx1ltv4fnnn8fGjRubfTYjI8M9ny0pKald6yai5vp306L/JC0Wj++L3Atl2HbiKo4VXO8BuVJlxrpvL+GD3EuuBQODIzG6T9stGBBFEZeaDFMWVtw8jI2PdvWMMYy1L51GCZ1BhyEGnUd7ndWOwop69zy2xtBWUmNucUjNbHPibImpXWqWyxrDm9AQ/GRQKQSP4Hf9tSskNoY7hUyAWtEYCl3n1locDcOIDT1bddeHD71Fo5QjSKN0h63ghteuNpW7TadxtXWmRS1SkDSY6fV6jx4yo9GIxMTEWx5vnNQPAC+++CJCQkKwdOlSd1vT+WnPPPMM3nrrLS9VT0TeoFLIMHFAN0wc0A3F1WZ8fbIEX50sxrWGBQNOETicX4HD+RXQaZSYPDACDw6KvKsFA41hbM+5Uuw7W3rTBx2HBKgwrmHOGMOY7/FXKTAgMhADIgM92i12B4wVjT1r9Shs6GEzVprhbKddeh1OEfVOB9A2j6ZtE3KZ4BGydBoldP4q9+vrvV2uoNUZVoh3JJIGs8TERBw/fhxGoxE6nQ45OTl45ZVX3Mf1ej3kcjny8vIQFxeHDRs2YOXKlQCAd955B0eOHEF2drbHNa9evYru3bsDADZt2oS4uLj2uyEialORQX6YP6YX5o6OwrHCSmw/UYxvL5TB3rBgoKrehk1HjNh0xIjY7oGYMuj2CwZEUUR+Y8/YbcLY+IZ9xhjGOia1Qo5+3bTo103r0W5zOHG1ygyTxQ6bwwmbwwmrXYTN4YTd6YTV7oTNIV4/5hBhd7ja7U6x4XjjL9eGv7ZbHLM3vPYWQWgYPtSomvVs6dzB63rvVoBKzrldPkzSYKZQKLBixQpMnjwZTqcTL7zwAsLCwpCamorMzEzo9Xq8/fbbmDdvHsxmMxYuXIj4+HgAwPPPP4++ffti9OjRAIBf/vKXeOqpp/DWW28hKysLcrkckZGR+L//+z8pb5GI2oBMJmB4rxAM7xVy0wUDp6/W4PTVlhcMiKKIi6W17h34b7Z/Umhjz1hMOAZ1ZxjrrJRyGaJC23dLFlEUYXOILQY/d4izNwZDz2NWh9gQ/JzwU8o9hg+D/ZUI9OPwYWci+T5mvoDbZRB1TI0LBnaeLkGtpfkWCz10fkjoFYxjBTffzDI0QIXx0a6eMYYxImoPPrtdBhHRvWhcMPDU+D7IvVCO7S0sGLjyw9VmnwvTqjC+4dmUA7sHMowRkc9gMCOiDk+tkGPSgG6Y1LBg4KuTxfjqRLHHs/fCtCpMiHaFsdhIhjEi8k0MZkTUqUQG+eGJMb0xb3QvHCusREFFPWIitAxjRNQhMJgRUafUdMEAEVFH0TY7MxIRERHRPWMwIyIiIvIRDGZEREREPoLBjIiIiMhHMJgRERER+QgGMyIiIiIfwWBGRERE5CMYzIiIiIh8BIMZERERkY8QRFEUpS5CapGRkejbt6/Xrl9SUoKIiAivXZ9c+D23D37P3sfvuH3we24f/J6bu3jxIoqLi1s8xmDWDpKSkpCbmyt1GZ0ev+f2we/Z+/gdtw9+z+2D3/Od4VAmERERkY9gMGsHGRkZUpfQJfB7bh/8nr2P33H74PfcPvg93xkOZRIRERH5CPaYEREREfkIBjMiIiIiH8FgRkREROQjGMy8LCsrC7GxsYiJiUFmZqbU5XRKBQUFuO+++zB48GAMHToUGzdulLqkTquurg69e/fGsmXLpC6l07p48SImT56MwYMHIz4+HrW1tVKX1Om8+eabiIuLw+DBg/GLX/wCnGrdNmbNmoWQkBA8/vjj7raDBw8iLi4O0dHR+NOf/iRhdR0HJ/97kd1ux+DBg7Fjxw7odDqMHDkS+/fvR1hYmNSldSpXrlxBcXExEhIScPXqVYwcORJnzpxBQECA1KV1Or/5zW9w7tw5REVFYfny5VKX0ylNmjQJr776KpKTk1FeXo6goCAoFAqpy+o0rl27hqSkJPz4449QKpWYOHEili9fjrFjx0pdWoe3c+dO1NTU4N1338Unn3wCABg9ejRWrVqFuLg4jB8/HitXrkR8fLzElfo29ph5UeO/FAwGA7RaLVJSUrBt2zapy+p0evTogYSEBABA9+7dER4ejvLycmmL6oTOnj2LU6dOISUlRepSOq3GsJCcnAwACA0NZSjzArvdDrPZDJvNBpvNxl3p28h9992HwMBA9/uioiLY7XYMHToUcrkcc+fORVZWloQVdgwMZl5UVFQEg8Hgfm8wGGA0GiWsqPP77rvv4HA4EBUVJXUpnc6yZcvw2muvSV1Gp3b27FlotVrMmDEDI0aMwF//+lepS+p0unXrhmXLlqFXr17Q6/WYMmUK+vfvL3VZnRL/Drw7DGbUaZSXl2PRokX417/+JXUpnc7nn3+OAQMGYMCAAVKX0qnZ7Xbs2bMH//u//4tvv/0W27dvx/bt26Uuq1OpqKhAVlYW8vPzYTQasX//fuzevVvqsojcGMy8SK/Xe/zrwGg0Qq/XS1hR52WxWDBz5ky89NJLGDdunNTldDq5ubnYsGED+vTpg2XLlmHlypWcyOsFBoMBo0aNQlRUFNRqNVJTU3H06FGpy+pUvvrqK0RHRyM0NBQajQbTp0/ncxy9hH8H3h0GMy9KTEzE8ePHYTQaYTKZkJOTg6lTp0pdVqcjiiLS09Nx//33Y+HChVKX0ym99tprKCgoQH5+PpYvX45nnnkGv/vd76Quq9MZPXo0SkpKUFFRAafTid27d2PQoEFSl9WpREVFYf/+/TCbzXA4HNi5cydiY2OlLqtT0uv1kMvlyMvLg8PhwIYNGzBjxgypy/J5nFXqRQqFAitWrMDkyZPhdDrxwgsvcEWmF+zbtw8fffQRhg4dik2bNgEA1q1bx5U/1OEoFAr89a9/xcSJEyGKIh566CGkpaVJXVankpSUhNTUVAwfPhwymQwPPPAAHn74YanL6hSmTJmCY8eOoba2Fj179sTGjRvx9ttvY968eTCbzVi4cCH/XG4FbpdBRERE5CM4lElERETkIxjMiIiIiHwEgxkRERGRj2AwIyIiIvIRDGZEREREPoLBjIiIiMhHMJgRUbvJz8+HIAiYNm2a1KW0mz/84Q8QBAGCIGD9+vUtnvMf//EfEAQBO3fubN/iiMjnMJgREbWT3/72t7DZbFKXQUQ+jMGMiKgd9O/fHxcuXMA777wjdSlE5MMYzIjIJ1VVVeH111/HpEmToNfroVKpoNfrsWjRIpw/f97j3N/+9rcQBAEff/xxi9davXo1BEHAa6+95tF+8eJFZGRkoFevXlCr1ejRowfS09Nx6dKlZtcQBAH33XcfjEYjFi1ahO7du0Mmk7V6+HHp0qUICQnBq6++ipqamtuev3PnTgiCgD/84Q/NjjUOCaenp3u09+nTB3369EFVVRWeffZZ9OjRAwEBAZg4cSK+//57AEBRUREWLFiAiIgIaDQaPPTQQzh79myr7oGIvI/BjIh80smTJ/G73/0OGo0Gs2bNwq9+9SuMGjUKH374IRITEz3C0zPPPAOZTIbMzMwWr7Vy5UooFAo89dRT7rYDBw5g+PDhePfddzFy5Ej88pe/RHJyMj744AMkJibiwoULza5TVlaGsWPHIi8vD3PnzsVPf/pTBAUFtep+QkJC8NJLL6GkpATLly+/w2+j9axWKx588EHs27cPc+bMwcMPP4x9+/ZhypQpOHXqFMaOHYtz585hwYIFePDBB7F9+3ZMnz4dDofDazUR0R0QiYjaycWLF0UA4tSpU297bmVlpVhWVtas/ZtvvhFlMpmYkZHh0Z6SkiIKgiBevHjRo/348eMiAHHmzJnuNqvVKvbp00cMDAwUv//+e4/z9+zZI8rlcjEtLc2jHYAIQHzqqadEu91+2/ob/f73vxcBiOvXrxfr6+vFqKgoMSAgQLx69ar7nJ/97GciAHHHjh3uth07dogAxN///vfNrtn4PT755JMe7b179xYBiLNnzxZtNpu7/fXXXxcBiMHBweKvf/1r0el0uo89++yzIgDx008/bfU9EZH3sMeMiHySTqdDaGhos/bJkycjLi4OX331lUf7f/zHf0AURaxatcqjvbEX7ZlnnnG3ZWVlIT8/H//5n/+J4cOHe5w/YcIEPPLII8jOzkZ1dbXHMZVKhb///e+Qy+V3dU9+fn744x//iNraWvzxj3+8q2u0xvLly6FQKNzv582bBwCw2+149dVXIQhCs2PHjh3zWj1E1HqK259CRCSNnTt34q233sKBAwdQWloKu93uPqZSqTzOnT59OgwGA9asWYM//OEPkMvlsFqtWLduHaKiojy26MjNzQUAnD59usU5XFevXoXT6cSZM2cwatQod3vfvn0RHh5+T/f05JNPYsWKFVi5ciWWLFmC6Ojoe7rejUJCQtCrVy+Pth49egAAYmJi4O/v3+KxoqKiNq2DiO4OgxkR+aSNGzdizpw50Gq1mDp1Kvr06QN/f38IgoC1a9c2m6Avl8uRkZGBP/7xj8jJyUFaWho+++wzlJWV4fnnn4dMdn2AoLy8HADwwQcf3LKG2tpaj/eRkZH3fF8ymQyvvfYaHn74Yfy///f/brpg4W61NOetsffsVse4jQeRb2AwIyKf9Ic//AF+fn747rvvEBMT43Fsw4YNLX4mIyMDr776KlauXIm0tDRkZmZCJpNh8eLFHuc1BpQvvvgCaWlpra6p6RDgvZgxYwaSk5OxceNGHDp0qMVzGoNk017CRlVVVW1SBxH5Hs4xIyKfdP78eQwaNKhZKLty5UqLKyYBoGfPnpg+fTqys7Oxf/9+fP3115g6dWqzob0xY8YAAL799lvvFN8Kf//73wEAL774YovHQ0JCAABGo7HZsSNHjnivMCKSFIMZEfmk3r1749y5cyguLna3mc1mPPvss7ccdvvZz34Gu92O2bNnQxRFj0n/jR555BH06tUL//Vf/4Xdu3c3O26z2bB37962uZGbSEpKwqxZs7Bjx45mCxkAIDY2FoGBgdi8ebN76BUAiouL8eqrr3q1NiKSDocyiajd/fDDD802R200cOBAvPTSS/j5z3+On//85xg+fDgef/xx2O12bN++HaIoYtiwYTddRTht2jT07t0bly5dQvfu3TFjxoxm56jVanzyySdISUnBpEmTcP/99yM+Ph6CIODSpUvYs2cPwsLCcOrUqba87WZee+01bN68udmGuYBrccPPf/5z/PWvf8WIESPwyCOPoKamBl988QUmTZrU4meIqONjMCOidldUVIR33323xWOTJk3CSy+9hOeeew5KpRL/+Mc/sHLlSgQHB2P69Ol47bXXMHv27JteWyaTYeHChXj11VeRnp7usW1EU6NHj8axY8fwxhtvIDs7G/v27YNarYbBYMDMmTPd20h4U2xsLJ5++mn861//avH4n//8Z6hUKqxatQrvvPMO+vTpg1deeQUzZszAp59+6vX6iKj9CaIoilIXQUTUltLS0pCdnY0zZ860+XYURETexDlmRNSpnDhxAtnZ2XjwwQcZyoiow+FQJhF1Ch9++CFOnz6N9957DwDw+9//XuKKiIjuHIMZEXUK//rXv7Bnzx707t0bq1atwrhx46QuiYjojnGOGREREZGP4BwzIiIiIh/BYEZERETkIxjMiIiIiHwEgxkRERGRj2AwIyIiIvIR/z9gfDP6IowsrAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(1, 1, figsize=(10, 5), dpi=70)\n", + "fs=20\n", + "lw=3\n", + "\n", + "ax.plot(list(range(layer_num*head_num)), cont_dict[\"0_cont\"], label=\"MI\", color=\"tab:blue\", linewidth=lw, alpha=0.8)\n", + "ax.plot(list(range(layer_num*head_num)), cont_dict[\"1_cont\"], label=\"OI\", color=\"tab:red\", linewidth=lw, alpha=0.8)\n", + "plt.legend(fontsize=fs)\n", + "ax.set_xlabel(\"Head Num\", fontsize=fs)\n", + "ax.set_ylabel(\"Cosine Similarity\", fontsize=fs)\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(10, 5), dpi=70)\n", + "fs=20\n", + "lw=3\n", + "\n", + "ax.plot(list(range(layer_num)), cont_dict[\"0_sa\"], label=\"MI\", color=\"tab:blue\", linewidth=lw, alpha=0.8)\n", + "ax.plot(list(range(layer_num)), cont_dict[\"1_sa\"], label=\"OI\", color=\"tab:red\", linewidth=lw, alpha=0.8)\n", + "ax.set_xlabel(\"Layer Num\", fontsize=fs)\n", + "ax.set_ylabel(\"MSE\", fontsize=fs)\n", + "plt.legend(fontsize=fs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1722, + "id": "462c856b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "l=9\n", + "h=0 \n", + "st_map_1 = model_outputs[0][-2][l]\n", + "st_map_2 = model_outputs[1][-2][l]\n", + "tc_map = teacher_probs[l]\n", + "\n", + "fs=20\n", + "\n", + "fig, [ax1, ax2, ax3] = plt.subplots(1, 3, figsize=(15, 5))\n", + "\n", + "# ax1.set_title(f\"layer {l}-{h}th head Teacher\", fontsize=fs)\n", + "# ax2.set_title(f\"layer {l}-{h}th head OI + map\", fontsize=fs)\n", + "# ax3.set_title(f\"layer {l}-{h}th head OI\", fontsize=fs)\n", + "\n", + "ax1.tick_params(axis='x', labelsize=14)\n", + "ax1.tick_params(axis='y', labelsize=14)\n", + "\n", + "ax2.tick_params(axis='x', labelsize=14)\n", + "ax2.tick_params(axis='y', labelsize=14)\n", + "ax2.set_xlabel(\"Token Number\", fontsize=14)\n", + "ax3.tick_params(axis='x', labelsize=14)\n", + "ax3.tick_params(axis='y', labelsize=14)\n", + "\n", + "heatmap = ax1.pcolor(tc_map[0,h,:,:].detach().numpy(), cmap=plt.cm.Blues)\n", + "heatmap = ax2.pcolor(st_map_2[0,h,:,:].detach().numpy(), cmap=plt.cm.Blues)\n", + "heatmap = ax3.pcolor(st_map_1[0,h,:,:].detach().numpy(), cmap=plt.cm.Blues)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1683, + "id": "4f3d6f38", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(16,8))\n", + "\n", + "heatmap = ax1.pcolor(context_similarity[0].detach().numpy(), cmap=plt.cm.Blues)\n", + "heatmap = ax2.pcolor(context_similarity[1].detach().numpy(), cmap=plt.cm.Blues)" + ] + }, + { + "cell_type": "code", + "execution_count": 1657, + "id": "46ef8f91", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZ4AAAEvCAYAAAB16qffAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAArEAAAKxAFmbYLUAAC7DElEQVR4nOydd3gUZf7AP7MtZdM7SUhCJ7SAtAAiVRFERRTsDVFOz7MrZzu901NPz/I7e1dsh4JiQQSpUSD03nsI6T3Zvjvz++Pdkk0PhADefJ5nn0x22js7M+/3/dZXUhRFQUVFRUVFpZ3QnOkGqKioqKj8b6EKHhUVFRWVdkUVPCoqKioq7YoqeFRUVFRU2hVV8KioqKiotCu69j5hfHw8nTp1au/TqqioqKi0M0eOHKGwsLDe9+0ueDp16kR2dnZ7n1ZFRUVFpZ3JzMxs8HvV1KaioqKi0q6ogkdFRUVFpV1RBY+KioqKSruiCh4VFRUVlXZFFTwqKioqKu2KKnhUVFRUVNqVdg+nbgpFUSgpKaGiogKXy3Wmm6PSBFqtloiICGJiYpAk6Uw3R0VF5RzirBI8ubm5SJJEWloaOp1O7dDOUhRFwel0UlhYSG5uLh07djzTTVJRUTmHOKtMbSaTicTERPR6vSp0zmIkSUKv15OYmIjJZDrTzVFRUTnHOKsED4BGc9Y16bSgKArV1dXU1NSc6aacNP8r90pFRaVtUXuOM4TVaqW6upqqqirsdvuZbo6Kyklx5MgRysvLz3QzVM4xVMFzhqg947iqOaici2zfvp05c+YwZ84cZFk+081ROYdQe7w25pZbbkGSpHqff//733zyySfe/41GI+eddx533nknhw8fPu3tkiSJDz744LSfR+V/hyVLlgCQlJSkDp5UWsVZFdX2R6Ffv368++67ft+lpqayePFiAH7//Xe0Wi1Hjx7lySefZPz48ezevZuQkJDT1qa1a9fSpUuX03Z8lf8tCgoKvIElalSjSmtRBc9pIDQ0tNFy4ABDhw5Fp9ORmZlJSkoKI0aM4Oeff2b69OmnrU1NtUdFpbVs2bIFEPlc/fr1O8OtUTnXUPXjM0BlZSU1NTU4nU6vFrJ///5m91u5ciWSJLFixQomTJhAcHAw/fv3Z9u2bVRWVnL11VcTGhpKz549Wblypd++dU1taWlpPPHEEzz77LPEx8cTGxvLX/7yFxwOR5teq8ofD4fDwfbt2wFIT08nMDCQ/Px8SktLz3DLVM4VzgmNJzc3t9ltkpOTvctOp5OCgoImt9dqtXTo0MH7v81mo7i4uMHjnQxOp9O7LEkSWq3W+7/JZMJms6EoCocOHQIgOjq6xceeNWsWd999N/fffz+PP/4406dPp3fv3gwYMIDbbruNl19+menTp3P8+HECAgIaPc6nn35KZmYmn376KTt37uSvf/0r3bt35y9/+ctJXLHK/wp79+7FarUCMGDAAN59910KCwsZNGgQl1xyyRluncrJoigKeXl5bN26lbS0NHr37n3aznVOCJ4PP/yw2W2eeuop77LJZGp2n/DwcO677z7v/4WFhXz88ccNHq+1rF69Gr1e7/1fq9X6CSJPOaDc3FweffRRjEYjmZmZKIrSosTZ22+/nXvuuQcQD8ukSZO46KKLePLJJwFhc+/VqxerV69m7NixjR4nLCyMuXPnotFouPjii1m1ahULFixQBc8pYjab0ev1fs/AH4nNmzcDEBkZSadOnYiNjaWwsJADBw60+BlWOftYu3Ytv/76KwDFxcWq4DnXyMjI8DNr1X0RO3fu7F1OSkrizTffJD4+HofDgcFgaPb4tYWJx1Q3evToet/l5eU1eZwxY8b4RSOlp6fzww8/NHt+lYYpKSnh22+/JT8/n9jYWO644w50uj/WK1ZWVsbRo0cBoe1IksTgwYPp2rUr3bt3V4XOOYLL5eLEiROkpKR4v+vRowe//vorOp2O8PBwZFk+bdGK58Rbcdttt7Vqe6PR2Ow+tU1fAPHx8a0+T2OEhIQwaNCget97cnd+/PFHQkJC6Nq1Kx06dKCwsBAAu93eIsETHh7uXfZs39B3HnNIY0RERPj9bzAYmt1HxUdVVRWhoaHezjYsLIySkhJAjBh37NjBgAEDzmQT2xxPUIEkSfTv3x+AlJQUvw5M5exm8+bNLFu2DIvFwgMPPOCNpo2Ojubqq6+mU6dOTZro24JzQvC01t+i0+lavU9AQMAp+3Waw2Nu69evH3FxcQQGBgKg1+txOBzYbLbTGlKt0jaUlZXx/fffk5OTw6233urtdA0GA8OHD2fr1q1UVlaSnZ1N//79/zBagCzLbN26FYBu3boRGhp6ZhukclLEx8djsVhQFIWdO3f6Rbz27NmzXdqgRrW1Ix7BI0mSn2bjWbbb7X4VDVTODjwvqYeQkBDy8/MB2Llzp9+2o0ePZuTIkQAUFRW1S3Jwe3HgwAFvbcHzzjuv3npFUcjPz6eysrK9m6bSBIqi+FWWSEpK4vLLL+f6669nyJAhZ6RN54TG80fBE6psMBj8bKcBAQGYTCYURWmxn0fl9GK329m/fz87d+7kwIED3HTTTaSmpgLi/mVmZmIwGBp0wPbr14/ly5djNpvJzs7+wyTuesxsISEhdOvWzW+dLMu89dZblJaWcv755zNu3Lgz0USVOlRXV/P999+TlpbG+eef7/0+IyPjDLZKFTzthtPp9EazeUxsHmoLmpb6eVRODzk5OWzYsIF9+/b55TTt3LnTK3iAJqMF9Xo9gwYNIisri4MHD1JcXExsbOxpbffpprq62ptrlpGRUc/prNFoiIyMpLS0lD179jB27NgzYmI8ePCgd4LC9uRsjObbtWsXCxcuxGKxcOTIEbp160Z8fPyZbhagCp4255NPPmnwe5vNxtVXX83VV1+N0Wj0W6fRaFrk5xk9enQ9U1xaWlqD5rm639X93xOZVJtnn32WZ599tsFz/y+Qk5PDJ5984vdbhYeH07t371Zn5w8ePJjVq1fjcrnIzs7m0ksvbevmtitbt271/i6NBUykp6dz8OBBSktLKS4uJi4u7rS2ad++fezatYvLL78crVaL2Wzm22+/xW63M2LECEaOHNluUYX79u1j48aNXHjhhWe8c7darSxatMib5AuiWkprcgVPN+ek4PHYLOtGpp3NeKLFdDpdgy+DwWDA4XB4/TztMXqyWCxYLBZCQ0PR6/XemUX/qPknTSHLMj///DOKoqDX6+nfvz99+vShY8eOJ3UvQkJC6Nu3L1u3bmX79u2MHTu23oDjXCMwMJCEhIRGO7CePXvy008/oSgKe/bsOa2C5/jx43zzzTe4XC40Gg1Tpkzh+PHj2O12XC4XWVlZ7Ny5k0mTJp12U6fL5WLp0qWUlpZSUlLCPffcc0aKplZUVLBt2zY2bdpEdXU1IAZOU6ZMIS0trd3b0xTnnODxzGMjyzJxcXEn1Sl4NAutVktQUNBpaKU/six759ypa2bzEBAQ4HXKWiyWeuY2jUbTpg+zoihUVVXhcrlwOBzExMRQXl6O0+kkJibmD5d/0hwbN270hrVfeOGFDB48+JSPmZmZydatW3E6nWzcuJFRo0ad8jHPFCNHjiQzM7PJGWeDg4NJS0vjyJEj7Nmz57Reb0JCAmlpaRw/ftybutCjRw/uvPNOFi5cyJEjRygrK+Pzzz+nT58+TJgw4bRFjCqKQu/evflog4XIjoOocWgIlOysWbOGkSNHntYBss1mY/fu3Wzbto1jx475revXrx8TJ05stM85k5xzvYvHAQ9CCLVWcMiyTGlpqVdjqr2/w+HAbDaj0+kICgpqs46+drRaY/HxeXl5TY7MnnrqKZ5++uk2aQ+I3zEoKIiamhpCQ0O92haIcOGYmJj/mVL3JpOJFStWAKJDGzhwYJscNz4+ns6dO3P48GE2bNjAiBEjzmmBrtfr6+V+1aVnz54cOXKEwsJCysrKiIqKOm1tueaaaygpKSEhIcH7fXR0NDfeeCM7duxgyZIlmEwmb4BI7969URQFRVFwuVzIsowsy7hcLm8S7Mmg0+mg0xiytgO5cHi+zMXmuZSeOExpaSlTp05tUwuGLMscOXKEbdu2sWfPHr+qKACdOnVi6NCh9OjRo83O2dacc29BYGAgWq0Wl8tFTU0NgYGBrbqpZrPZG1pYtxNwOBzeEV1bjhI8Zra6YdS1SU5OZsmSJV5TV90XPDExsc3aA0KDCgsLw2g0otFokCSJsLAwqqqqcDqdlJWVER0dfdY5TE8HS5cu9d6jSZMmnbLArbJBgBYCdELrOXz4sLcD9CRdthft7fROT09n0aJFAOzevdsvkqohtm/fzuLFi4mJiaFXr1706tWr0fyggoICP21cp9P5CR0PkiTRr18/unXrxrJly9i0aRM2m81b6qchTlboAFTb4NFlvv93FGso0V3CGOkDdu7cSXBwMBdffHGb3Ie9e/fy888/e01pHqKjo8nIyKBfv35+yeQNsewwLDoEkYEQb4T4EIgziuU4IwS3g6X9nBM8nknUqqqqvKP0lmbZyrLszUMwGAxERkb6rVcUBY1G4/3bFiiKgs1mA4S209jDZzAYGDJkCCaTCUmSSEhIaJcOo7YZwGg04nK5MJlM2O12KioqiIiI+EMLn9zcXG9SZP/+/U95bpktBXDtfEiLgO+vFh1aTEwMJSUlZGdnk5GR0W6/pyzLzJkzh+7duzNkyJCT0rY2bdrEsWPHGDBgAGlpac22PTQ0lI4dO3L8+HH27NnTpODZvn073333HSACO3Jycvjll19ITU2lV69epKene4XQ8ePH+fzzz+nYsSNXX311i/yQQUFBTJ48mYyMDFasWEFVVZXXZK3RaNBqtd7lk0mGra6uxmKx8NqOOPJFt8KIjrD6OOQ7o1gaOZNx5R+xfv16goOD28T0GBYW5hU6gYGB9OnTh/79+5OYmNii56rSBnf+DDZX49uEGoQASg6DOVNOuckNcs4JHhC25OrqahRFwWQytVjw1NZ2QkJC6gkXo9GI0WhElmW/m2g2m3E6nX7lUVpDeHg4Vqu12XZ6tLfWCNLS0lL0ej3h4eEtapvL5UJRlAY7IY/W43K5sFqtWCwWtFotYWFhLWrPuYYnoADEoGD8+PGndDxFged+A4sT9pTAooMwpadEZmYmP/30E4WFhRw5csSvVt/pxGP3P3bsGBs2bGDcuHH07t27xc+woiisX7+eoqIiSkpKuOOOO1q0X3p6OsePHycvL4/KysoGR+DV1dXeuoDBwcGEhYV5K8p72rxo0SJSU1Pp0qULq1evxm63c+TIEfLz81tVoqdjx47cdNNNLdpWURQ2btxIUFAQffr0aXLb5cuXs2hnJYtCxLEv7Q6vXwzP/gYfbIEiOYrF4bdxUeVHrFy5EqPR2GAprYYwm81s2bKFHTt2cOutt3r7hMTERIYOHUpqairdunVr9WBi4X6f0IkJglIL1I2JrbaLj9VZb/c245wUPBqNBqPRSE1NDVarFafT2ewNUBTFq+3o9fomO/faAskz8gdhiouIiGiVs1CSJAIDA1tkugsICGhVjSSNRkN4eDgVFRWUl5cTGRnZbKdSXV2N2WzGaDQSFhZWb3tJkoiIiKC0tBSHw0FNTQ1arfacj8hqiM2bN3srEIwZM+aUr/G3HFhfqy7rp9thSk/h5N2wYQN9+vTxm4qjrTlx4gTV1dXesiexsbEkJyeTm5tLRUUF8+fPZ+3atVx00UV+OUmN4XK5SE1NpbKyssFKBY2Rnp7unRZ7z549DU5CGBoaylVXXcXChQu54YYbiI+Pp6ysjF27drF79+56QgjE8z59+vTTWhduyZIlZGdnYzAYSEhIaDQfqKCggA1bd/F76J8AYbb6+yiQJHhipDC1vrkRyojkl7AZXFT1CQsXLiQoKMgv6VhRFOx2O2azmZCQEK8mV1ZWxtKlSwExgKhdYeDiiy8+6eubt0f87RENi68HlwKlZig0iU+RCQproMgMxtNocjsnBQ/gFTwANTU1zTo9a2s7rdFctFotBoMBu92OzWajpKSEyMjIsyLJU1EUrzYGNBti7gme8Ozb2G+g0WiIioqipKQEl8tFZWUlGo0GnU7ndc7KsnxW/AYniyzLrF27FhBBAC2OYrOXQ94vkDgJDL6RvKLAv9f6b7o5H3YUQt94PbNmzTqtJrZDhw4xd+5cZFnmhhtuIC0tjeTkZGbMmMHu3btZtmwZ5eXl5OXl8cknn9CjRw/Gjx/v7VitVivl5eXez6BBgwgICGDSpElceOGFrWpLREQEHTp0ID8/v1HBAyIQoUuXLt7ONioqipEjRzJy5EhKS0u9QqiwsBCNRsOVV1552h3mvXv3Zv369YSFhTVZvmrp0qVsDhxLtVYET/x9NESb10HxMaSUaTw8XMKghVfXQYUUyS9htzKh+hO+/fZbNm/ejMlkwmw2YzabvYnlN910E506dQJEWZsOHTqg1+vbLEDjSDlsEuMsrkwXQlInCR9PfDuXiDxnBY8nIq12LkpjnW5tbUen07VKq9BqtURHR1NVVYXJZMLlclFaWkp4eDjBwcFN7ut5cE+mw2lqX4/QkCTJa2ILCQlpVhOrqqryHrM5m7ZWq/UKH0VRKC8vr7dNXR9Zbm4uUVFRzf4uZwMajYbbbruNZcuW0a9fv5b59JwWWDIcqvZCWA8Y/QuEpAGw9AhsE9HY3D8U3tkkTG6fbod/X3hyz0BrqB2dabFYvMuSJNG7d2969uzJhg0byMrKwmKxsG/fPvbv3098fDwVFRX1qpJ36tTJG9ByMnld6enp5Ofnk5OTQ01NDSEhIRw4cAC9Xu+XU9LYsaOjo7ngggu44IILKCsrQ6PRNDu4bAuSk5O59tprSUlJaXRgdejQIdYctbErZCgAF3aGyzoWwI9jwGUBaxFSj7u5LxMMWvjXGqiSIlgUcisX13zaaP0+z6AQxH275ZZb2nRwN3+v+KtBZsrRoRD/N0g+M4nN56zgAeGn8RRwNJvNjXamFovFO6o4GT+Np4PX6/VUVlaiKAoVFRU4HI4mO3yr1UplZSUBAQGEh4e3qHNzOp1UVlZit9uJjo6u9+ApikJZWRkGg4GQkBBv2+pu42l37bZ4ghyMRmOLzIWe0VZjUxrXHhHu3r2bb7/9lsTERG666aZzImw4ODi4dRUFtv5VCB2Aqn2wZBiM/hk5cgAvu7WdDiHwp0HCbPHlTvhhHzx+PkTWivovKioiNDS0yVQAT2JkS5/VxMRErr76arRabYPJglqtlszMTDIyMvj9999Zt24dLperwZl6NRpNvaip1tKrVy+WL18OCHNbVFQUc+fORZIkrr322lb5uU5XSHZjNBXhJssyi5Ys5/fgK0CSCDUo/HOMhHTkEyF0AHb8DdKuhYBo7hoshM8zv0GNJpzF4TO5LXwZaWFOgoODMRqNBAcHExwcTFJSkt+52lLoyAp86zazXWD4hXjbRsi6HM57BXrcK9SfduTs7x3q4jSDJR+CEtDrjV4zmMlk8nbEtVEUxfsS6XS6UwqTDg4ORqfTUV5e7o3+MplMBAUFYTQa0ev19Tp7WZax2Wwt7kA0Go1XQDRUt81isWCz2bzHrJsUVzsx1OPz8XznOX5rEukCAgKIjY3F6XQiSZK3M/Qse8jJyfF2ZIWFhfVeorOFkw4vLlgK+/8jlkO7Q/V+sBbA0lH8nPY7e0pESZ17hkCgDm7qJwSPzQVzdwlh5HQ6+e9//8uhQ4cYM2YMF1xwgbdNJSUlHD9+nNzcXE6cOEFRUREGg4Ho6GhiYmKIjo72++j1eo4fP+7n72hJhn5QUJA3QXbt2rWYzWYiIiKIjIz0fsLCwk45qjM6Opq4uDiKiorYs2cPI0aM8D4358KgxIMsy6xatYo+ffoQGxsrwr+relIZKEyUT14gEW+U4ZBv4kfs5bDjaRj0OgAzzwO9Fv62EqqVYL5VLmXhJAg7vVPe+JGdCyfcY4mrAj51f6vA5vuh+gAM/D/QtN99OXeeAADZhbPqIDWykXDykUK7EhISQllZGbIsY7FY6pl5ams7DQmm1mIwGIiJiaGiosIrIDzmPr1ez/33388XX3xRb7+XXnqJmJgYbr31Vu938fHxZGZm8uKLL9K9e3dACIbg4GA0Gk09oePxt4AQog05w81mszcXyRNwYLFYvH6g0NDQVncqLZnG+aKLLsLhcDBw4MA2zzlqS7Zs2cKhQ4e46KKLms138GKvgGz3fQuIgfFZULgcsm/G6TDzynpxn1LCYVovsVl6LAxJFMEGn22H288T98zjZ9yyZQsjR45EkiTeeecdioqK6p/Wbic/P98bAFEbj5l52rRp9OrVq9W/Q0REBBMnTmz1fq0hPT0dh8NBUlISnTt35rrrrkOSpDYPDigywYY8iAkWYexxwW0zgFcUhS+//JJDhw6xZ88ebrnlFr5avpMdAdcBcH5Hhem9JChcCTWHxE7ByWDOhQNvQ9c/QYQIJLg5AyTgyZWQUwl/XQZvTmw/RWO+W9sJkyq4MOB76P0YHJ8vNPcDb0HNETj/v6BvnwjWc0vwaLTkKV2ocBkJs1SQFGAhICAQnU6H0+mkpqaGoKAgr3Cp7dtpy/I4Hr+PJ+HUY+7z5BX16tWL//znP36CITU1lcWLFwPw+++/o9VqOXr0KE8++STjx49n9+7dXk2kIVu2oiheMx/QaPh0UFCQ16zmcRh7Kj3odLrT5n/RaDRnfSFMi8XCsmXLMJvNVFRUMHPmzJYNRDb+RXQmAEPeg6B4YUoJjOf7xXM55BJRZPclL0SvmYToYkRnsz4Pcqth+VHhCzj//PNJSUlh8ODB3nPHxMR4BU9YWBjJyckkJiZis9m89b/Kysr8MtQ9fpwNGzaQnp5+VuZanX/++YwaNcrbNo/jvC1QFNhcAJ9sFWHrDt90MwTrIS1cCKFOEZDq/ts9GiJaYfCQJInu3btz6NAhiouLeevd91nM1SiShkCtzAvjNUJwHHpf7KCPgLFLYVF/cFmFNjFmsVe63JQh/IDz9sDCAzAsGW5sXe3Zk8Jkh58PKIDE5IC5BIZ3gr5/h/SH4LcroXAF5C+CX8+HUT+B8fTPJnvKgueKK65g5cqVjBs3jnnz5rVFmxrFKYNZER1nlRKBqcJFh1CRUFpZWYnT6cRms3nNaZ5Qazg5305zeCoMhIWFYbFYvJqG0WjkvPPOq+d89zB06FB0Oh2ZmZmkpKQwYsQIfv75Z6ZPn97ouaxWq9cBbDQaGw2Q8JSnLy8v9wofD6fjN2iKvXuFP6S9ZjVsDo1GQ58+fdiwYQPjx49v2W+RMw+Ofi6WO90MHa/wrnLEjuU15wgAumj3MKXwMtj0ZzjvVdBomdBFJOIVmeDTbULwdO7cuZ5/Y8iQIfTu3Zvk5ORGc6Y8Aw+PICotLcVgMPh17Gcbp8OkZnXCj/vhk22ws76SCIDZAbtLxKcuKeGQEQ/94qFfHPSJg5AmXCmDBw/m2LFj7N69m99t/SgPEpUSHjtfomMYYC2B49+KjTvdKIJO0h+Gnc9Awa9w4ic/B/4zY2BrIRwsg39kwYAE0YaWsvo4/HoYZg4QCZ4t4ZdDYHaKZ+TKwE+h/wvCrGaIFAEyG/4Ehz+Gih2weCiM+hGiW5ZvdLKccnr+vffey5w5c9qiLc2i00C3KIkYfTWg4FK05FZBoT0YRSNMQbWLGHqWT3cxUE9eUWxsLAEBAd7/m8MpQ78MUWK+boG/2siyTGVlJWvWrCEpKYmNGzcyYcIEgoOD6d+/P9u2baOyspKrr76a0NBQevXqxbZt2/yE0zvvvMOwYcMICQmhY8eOzJo1y8+B/MUXX6DT6di4caP3u9WrV6PVapk/f36rf5PNmzczd+5c5s+fz/Hjx1u9/+kgICCAiRMncs8997Rs9G3Jh/WzxHJwirCD12LeHsipEb/xAzHvo5Vk2P86rL4GXFb0Wrihr9j2txw4WGIDpwnsleCyeY/jydRvKlHXk1/VpUsXhg4dyqRJkxg/fvz/TCXxE1Xwr9WQ+SE89KtP6IQY4JYM+Pk6+G46vDoB7h0Cl3UXAqauHyWnUgiuf/4GV8+HPm/D+M/gwSXw9kb4v3Xw3O/w+HK4fzHMWijxnWYqv0TcwbZA4ZPrH2Pjxgy3sD/6GciixiFdbxd/e82GILePc8uD4LJ7zx+sh7cmCj+g3QV//lmU3GkJn2+HG76Dj7fCbT+I/VvCvF3C4tFJu5+BHbSQdJlvpdYAQz+EjOfE/9YCWHoBHF/QsoOfJKc8JBk9ejQrV65sg6Y0Tm5urv8XsoPgqh0UKfHYlQDKAAmFUK2d6BCH1wwhy7I3DNMzKqyoqPAmRdZO5rPZbBQXF3v/T05ObnU7JUlCq9Wi0+nQaDR+U13XjSKzOuFQGRw6IAROXJx/zSmz2YzFYvH6Y2pPXXvnnXdy9913c//99/P4448zffp0evfuzYABA7jtttt4+eWXueaaazh27BiSJOFwOKisrOThhx8mOTmZvLw8nn32Wa6//npv9vj111/P3LlzufXWW9m0aRMul4sZM2Zw1VVXceWVV7b6t4iLi/OaQL/66ituu+22MzIfiM1mw2Qy+UVGtSgsV1Fg3Uywl4n/h33il7djdYpOCiA9BiZNeQJ+2wDFv8PxefD1AkDhWlcMr5ODAwOfffsufw+5V+ykDYSRCyBxQhtc5R8XpwxProD/7hKRWR66RApT5tSeEFpLuJxXJz9XUaDcCofLYXcxbC+EbUVwoFRk7CvAgTLxaRwt0AEk0EsuXpsUgEZyH9wTVBA9FCLcowydUWgVa28Ujvv9r0P6g96j9YiBf4yGR5bC0Up4dLmoeNCY4qoo8NJaeHOD77u9pfDGenhgWFPtFgJ77QnR90wNmIN03kv1TyRJ0PtRCOkCa28S0Xm/TYUBL0LPB0+LI6pdfDwffPABH3wgblBDTtTm+PDDD0/63JmZmUyY4Hu5161bR3Z2NuHh4dx3333e7wsLC/n444+9/z/11FNNHtfiEC9FaAMWr9WrV/uNRLVarZ993ul0kVMNRw4f5Z9//TPBxhBSzxtHmUVkQEuSb+qG2nhMiLfffjv33HMPIEwwkyZN4qKLLuLJJ58ERImQXr16sXbtWu9Mmf/3f77RusvlIjk5mVGjRvnNjvnOO+/Qu3dv/vGPf2CxWCgrK+ONN95o8ndojOTkZK688kq+/vprLBYLX3zxBbfddlu7VkCwWq188cUXVFZWcsstt7QuLPfQ+5AnyunQ436IH+O3+qudeOtzPTgMNIFRMPZXWHO9ML0o4n7HaQqZGDCPH2zXMc96Cw8HP06Ipkb4ALJvhkk7IPDcnp30dPLVThEdCMJzNr4z3NwPzk9pWX8oSRAVJD6DasW8mOyws1j4XHYUir/Hq4RGEqwXWftGg/hb+/9Lu2np5LGgl6yFyt1i2aPteEi7Dva/CaXZsPMfwgwX6LOpTe8Fa3Phu71CAxuWDNf3rd9+u0sEIniCA5LDIDZY1AR8cyNM6Aq9m3h8vt1ehoJ47qd2KoGYhpN5AUidLvw7qy4DWzFse0xoR2HdG9/nJGkXwTNz5kxmzpwJ0GgW87mE2SG0FQVIDpWJCrALdVt2gMtCRt9efPD6s6C4QNIgaQ3CbOMQpq2gIJ+HMyExmRfe/oqouERyq6DEDImhInquttnQE+0G/tMue0JoR48eXe+7vDxf/ZZVq1bxxBNPsH37dm9oNcChA/uIDdOALpTExERee+01Zs6ciSzLfPnll6c0ZXPPnj25+OKLWbRoEeXl5XzxxRdcc8017VL7zWw289lnn3nzVDZv3tzyWmzVh2DzA2I5vBf0f85vtcXhG31mxMN4j9VOGwgjvobcb0WUkKQFScvNFXp+WAc1ShjfdljMTQkrYdvjYC0U9vXz57V7HsW5QIXVVw2iZzS8f6nw0bQFRgMMTRIfAMq3ixytjlNbHlbsCSrQhUDK1f7rJI0wzS4ZCo4q2PYEDH3Pt1qCf44RGtihcvj7KuHv6VXrdat2F/T8LUf83ycOPr5MaNsXfS4SlB9aAj9cI8K166IoMH+nGLwO068gecgDzV9TTCZMWAerJkOvv54WoQPnSFTbbbfd1vAKS74IdQWU0K5U2PWUmRUUd1RRsAG6xPnnrAwdOpTePTqhVawgO70PWXx8fOPnqYVLhuOVvnPkV8uEWvahR9hRcZoICdYzqE+dKsfmE2AT3s65P69C1hqJj41kZGcnFl0i+VYFq1PC6hRmgVBDIDq0aBGG3NoJqLXDgD0h1w195wksOHLkiNcn8NlnnxEfH8/xY8eYdvXVWEv2QE2g+B1CezB58mT0ej0hISFMnTq12d+jOYYMGeL1T+Xn5/POO+8wZcoUb/j46aCmpobPPvvMq10PGjSIcePGtWxn2SXMDU4TSDoY9pkQKLX4dDsUu5PMHxpWR2ZotJAyzW/7gQr0Pgy7iuHTvOHcOHY4kikHDr4rtKMjn0HnlhWx/F/ilWwhfAD+MabthI4fpRuFRnLiR/F/5xnCkdQc9ko4Nlcsp14L+gZy42KGQNqNwg906APofhdE9veuNhrgrUlw2X9FvtddP8NP1wq/VWEN3PKDMA8CjEqFtyeJfQBmj4CnV4kAirc2wr1D659+8/59HLGJEkNXpeZC2Jj6GzVESCe4eDNoT1+i0TkheBr1t7hiRSQGQKCGjsZkSitNFFr0OCVxh4plCHL45piICIYIl3vEX70fQnuARktAQECL/Dp51Qo2l6+ncaHjhCuVVO1BtyiSAI14ECWd0Hpku88BCXTtNxydTkdH7WF0zipCnVWEaIMo06dSaDPilCWq7RJo4wiSTUQGOOoHR7hsIlGt2l1+w14lOk1N/aHPkiVLcLlcfPPNNxh0WrAVYcrfJnZTdByXUwmWTURV7+fee14gISGB0tJS/vnPf7bJ5HPjx49Hp9N5y7V89dVXZGZmMn78+DafnbGqqoo5c+Z4qy1kZmZy0UUXIUkSn2+H9zaLOlX3DKkjMGSX0EAOvA0la8R3fZ+CKP/imNU2eMcdfzEkEUa2IPJUkoQ/4pGlIpppbS4MH/BvkZRacwg2/QXiR7dLGKsfnmu2FYO1qOG/khZ6PgCxI9q1aftKhDMdYHK3WppJQ+TMg90vQOR5kHwZxI8DXTPBRCXZIvLMY071cPgjYVbtdEPT+x/70lepoK6ZrTb9nxcasNMEm+6DcSv8HryeMaLO21+XwZEKMa/PX4bALd/7Ej6n9YLnx/prNTdnwM8HRLj+6+vhos4id8yLojB/3X6gB0GSiYkXtLKw6GkUOtAGgmf8+PFs27YNk8lEcnIy33zzDcOGNePxaiu0ARAQDbZSoU0EdiA0SI/NXIpJCsdCMHaXUGUTQyBKX4NUU6tOktMMNQchtJtQjZuhwqpQbhUPTYRUhk4rUeKMpEqJoDKwLxFBGtEeXTmE1QkhVhRcQVu9/wZrHUQEGcCuB9mB5LIQ7dpLhDaAIl0nSuxGFCQsmhAcLgW9FV90TPVBcFfMxul2NFjzoWKrSADT+4dxWywWdDodWnsxmIpBdvL1ApFTdEJOIUGOpZxYvv5pHl9+NZflSxez/+AR7r77bq688kr69m3A+NwKJElizJgxpKam8u2332IymcjOziYnJ4crr7yyzUqilJeXM2fOHG818ZEjRzJmzBhcisQ/VomQZhAjafuJZTwc/yGYj7s/J7x+GQCiM4WpoQ5vbhDOaoCHhrfcQnZ5DxEtVWEV7RjeMURoU0vPF6aY7FtEDkgLnkMAKnYKwRA3+uTMdDnfwKZ7hdWgOXIXiLyPXo82OLBpaxQF/p4lKicHaOGxpuaSq9wrnPguK5RtEuYvbRB0uEj4JxIvEXlXHopXw45/QMES33e6EOj+Zzj2NZiOCPNn1CAIbyIN4KDbzBaRIbZtjOAk8bttfwKKVomkzZSr/Da5prcYjHy/D37YD4sP+aYuuG+o+NS9xRoJXhwPF38pTG8PLYUF033CyZr7Kz9WiB9uYvwhjKHtkDDUCk45nHrp0qUUFxdjNpvJzc1tP6HjIcgdxqIoYC3AYDAQHxdHl9hAOoaJG6QoYvSQW2HHpSDuoidCyVEtRp2K3OgpAGxOOFElwmoM2EgMKCE+MhyD+0bnmQNwKk2EtkoSVXbfS9shTI9kTBaRMMY0rzlHq9joIO+lu2434Tphz3HKEscqoaDKHWzg9BWBRFtrZKcowgRgOir+txSApZDRmX0xm8386c4/s3TFah595g0WrxROClkRbaqqrOCpR+5l+i130bHPMG64+RYuuOACZsyY4a38cKp07tyZP/3pT948lry8PN577z127tzZ6mN5atZt27aNn376ibfffpv//Oc/XqEzZswYxo4dS7VdYsYPPqHj4Y3j4/jP7s4iCs10zF/oBCXCsE/9bP2KIsJ5394k/j+/YzOj8DoE6uBqdzX8JYdFtBGxw3zCrXAF7PtPCy5chl0vwKIMWDYWlo6C8q0tb4ilEH67CttvN/B/JbfxnvkB7H7PrSSqM4T3grhRoA8X59z+JKy4EMx5jR66IZYfEWHKJnvz23pYfEjkqwDcOQiSGnMJuuyw9gYhdDQGMLgHMC4L5H4P626D7zqIwq47noFl40SSpEfo6MOgz5Nw+VERhXb+XNDohXay+mr/96w2ZZugfItY7np784K/5wPiHQfY8rBoby0kCZ4bC50jxP82F2gl+Nc4uD+z8cN3ioSH3d3tziJ41/1sIrtYuuZXqhQxAL1ySLem23cGaJtpNs8k2kAIcD9wtmKQ7d6ZBSODoGskBGiFwCiXozjk6ok1qBuEdBUJVODurI+J3qUBFAWOVzhwKRokFDrqT6AL7YxWoyHJXZfUKUNeTePNtDigxv3yRQT6bLVIGgiMgfDeENoV9OKAAVhIZTdp2gPoEQLHpIh15VI8SnBHIbTC3A9VcAoEJfiryC4rmI/Tv1s47/z7ryxekc3lNz7MtgPFPP0fkRQpIWznbz1zPwaDgfueeAGTEsLBCgN/f+Vd9uzZw0svvdSye9ECQkJCuOGGGxg3bhySJGGz2Zg/fz4//vijt8KCB0VR6s0nX1BQwNy5c3n55Zd5/fXXWbBgAZs2bfKLlrzwwgu54IILyKmEK7+BVe4UqWH65SyNTKeLViS2vmx+lnf0n0D3u6H/v2D4lzD+N5i8z8+p6nDBg78KWzpAaji8cBJzxt3QV/zesgJfeGRtn6cgUuRysfWvviiphrBXQNYVsO1R30Cp+Df4ZSCsv1MkMzaGosDRr+Dn3lhzfmJW1be8Yn6Gf5peZppcQs7IvTC1EK5xwJXFcMkuGL8SJm4RocIghOOiDDixsNlrVRQRbn7rD/DCarhpQcvyVaxOMZEaCCvFnwY2sfHOfwghACIPZWohjF8lOvoQT906RUSf7fibKHMEosJA36eFwOn3D2GlAIgeDP1fFMsV20XlgYbwaDvaIEi7vvmL0gXBAPc7ZDoqAkvqDHRD3P6eiECx/MGlcE3T89ABcGt/GOgee//fethfChz9nHllowFIDKxheNrpy2E8WSSlqUknTgOZmZlkZ2c3uG7v3r0nl+XutEDlLrEcGA/GWo592Ymr6gC59ngqFSGgNJIIS4wIkIXZyuH2+QTGQXDHekOMgkorRVahkcRrC4iPjBKJV25yq6DMPThKi6iftKYown5bYxfn7h6NV1Nq+HpMQluxi6kIXGgoII1SZySecixGvRgJBtY1liqKEDj2cnBUCHMiiNyCoEQq5TByKiU8il9quK+9sgIlVTUUWYOQEQ2UUIgOlogzigTeupz0PUMUFp0/f743yu7666/3VgZet24dv/76KwCPP/64Nw+roKCAd9991+84RqORlJQUkpOT6dKlC/Hx8WzMgzt+EjMsAlwd+AHPhtyFofcDFKQ9xfQFQRyrFOueHiVe4IYw2UVkkUd49Y2DTy4XdcFOhhk/wLIjIrx37Qz3/avYJYSHbBN+iovW+j1fgNBqfrvKVxMscoAwJe39t3heQHSo/f4B3e70j8yyFMCGOyF3AVYlgDsqv2OVw79OW5gBXrwQJjZUmFl2wPa/CT+Kh54PQMbz9duJGIQ9sUKEQtdmQIKYSrmp4phvrBc5KyBqmU1uLAaleA0sHSk68Pgx9c2UiiKi1E78ALk/COFjiBTt7n63X06WH4oCWVPEfgAj/guptSLWnCb4tgM4q6HTTUIzbgmKAsvGCHMbiHs37BPf4NdNlU30DfXe6yY4VA4TvxCaUkZUNe8FjWX4ibW40PHnQTKPjDhz+kVj/f0fQ/CACIG1l4uHL6KvUJkVWSRwOapRgFJtN/Jt4d6pXsMCID7YRZBlv+/lDUqEYF/Af43FwuGqQEDCKNXQOUqLVMdx6ZLFSMMhi865e7R/J11pxdvJxRtbMemSyybMBrpg0Bgw2YXJ0DMlrSSJkiyxwUKgNXoMRQZtIGVWiVy3jNVIQkg2VC7EaS6m0CRTJseiuJVirQQdQn15Rh5O6Z4h/E/ff/89YWFhTJo0yfv9xo0bWbhQjKwfe+wxb16ULMt8/PHHxMfH07FjR1JSUvwShEHkRjzyq4JdlpCQecz4MLeHvI007GNvJ3KiCqbPE3XUQJg66uZRlJjh1u9hu1uZqhtZdDKsOiZG/yDu2wWpIkBhpPVtYnbfJVb0fgIynvHtdPgTITg8JpouM0XlY22g8E1tnQ1HaxWmDe8jQnnjx8DRL0Xwgr0cqxLIzJol/GYdCcCELkKQvpLtS868OUP4VBrs+PKXCH+K1f2DRA0UHXOoT1qZHXD3IiFcQSTX9owR9wRE+PlnV0B4A8KnoAbGzBHHGJoEc69sxMzkqBb10GoOC1PgpO3NB2Y4asTv1ZJQaVuZOL75OOhCYeJm3zUe+hjWzRDL47MgbmTzx/NgOi6mIvCY6YydYOQ34nc8GRQFKndCwVLe3R7Gc/kiKrendjt7XcKns/wmkWx7pvjjCx6n2WemCEoQJStqDnu1BgJjITgFk0Mip9K/qGBEgEy8cogA2S0djB0hMB6nw8qBMg0ODGhx0i3ChiGg4QTIKhscrRDLUUG+OkqyIoSS3QV6jZhytrHi0HXNSrXRaDRoNBpkRYTyFpl8lkGdRgiQEAOEGhqO6S8xQ567k9VqRNHE4CZcUlgKsJmKKZCTvJoiiOMnhfk0tlMVPCBMarLTjtZZJjRWSUNhYSEHDhwgMDCQ/v37t6jul6zAq9nwn/Xi/yBM/CfsOi6K2g4XLIDIDL/tcyph2jzR4YGYsM1TXfpYBdy4wDdgmNpTOHMb+m1bg6zAJV/5wmRr0ztwP6Ok+YwMWMqgi5/DEJMhCpR6suO1gTDoLehya/2di1fDxnugfLPvu7Ce3vmDLEoQMxxbWFMpwmsndhXZ8notrD8Bf/nF9zv0jhVmn7SIBi7AUiDCzQuENoouRPipYs+nNHgQt/1sZIt7ip/hyfDuZPFcPr7clwjaN07h87H7iKj4VZi/yjZB8hTuLX6FBft1aCQRVtxoYuS6mXDIHfI87HPo1AJzV2spXiNKxygutxa6RpixlwwX2lNYT7hkd+sDO1xWEdRx0J3TozHAwP9A1ztadixLIeT9BAXLoHCZdxDgUjRcWbGaLU5fnuSABIUFV5/Z/LA/vuABYTazVwitxxDlzZvBECFsvu4b65RFR1xi9o30JBQiNOXEa3IxYEcJTuZYTRBVslDJU40mwkOazrrPqfTlHXSKEFUNiky+FzolvPHquEePHm2yfthTTz3lF9psdYpRu8lRf9tAnU8IGfVQbBF5ASCEVOfIFqrylnwwn6BGCeGE3AmbIoapGklMeBYVBPv2nbrgQXbAioniRdIZIbyv0Foj+kFkP7FsaHrYVmyCx1cIxzRAgiaXD8MupU9yFIyYK/xoDXC4XGg+xWZhxHxtghgh3vI9lLjNdHcOFHkTbZXjabILjSDrGGTliEnj6hIsmZgZ+SUPaO4Q5w3pDCPn++WB1EN2iWKP2x71PfuAObg3t1p+J7soAhDhya9N8BeiZRZRm2yl26QYYhAhvJc1NNO0IsOel9y+ChF4kuPqxE2Vv3DEJWxjlyUe4uWxVgyRPUHSIFcd5G/L7XyWIyIseum28GX4eCI1olbNRscwrqwQYezX94HnGku7Or4AfrtCLKdcDSO+On3Jt7tfFNokQPe/QNdZ8LPb8TLg335lcFrN4Tkies4Tkp12Awx5Rzz/dbGVQe53wkdXtKLhQKjI/hwIvZZLtjyITRY39p9j4IYzHMz2vyF4nCao3OP/nT5ETNzVQJiqwyU6nFKLT3uQkInSFKPHQYEs8nqiDBaSI5t30DndJjenLDSCThGiBpSsCO2iS2Tj74jdbmf79u2NHjsxMbHePDeKIjStarv4OBoIPpPAa1r0tCmgNUH0buEjI1EoJ1Esx+P1MxnAkr+XPr1OUfBsvNc3yVpjeIIpDNHCXOKuCqCg5eviEfzz6BQqncLx0k+3gQ/CLie+19XCqduMeWV/qSgYWWYRJsUAnTD3SMBTTfh/2gLFrRFn5YgM9ezjTmyyr70zg17mie5ZSMM/FQOolmCvgB1/h5y5mJJu5Jajz7E+X3RGl3UXhTQb8tfJishzenG1CGUGEep7ZboYrEQH1Xl+S7Jh073sKJS5peInShQRtnxH0Es8apyNRlKEqUofCpY8ESZteo2PLfcCkK7dxhcpdxMZCJcfeIXtzsGESeWsHPsD0b1vqv+yWArg575CqAYlCRNbQBSnDUWGlZPFlAEgwqbLNgoz/pQTp17qqGKH8NtV7xf/h/cWVSzCewpzYu4PcOy/ULBYDM5qY+wECeMhYRzEj/W25e2NIpgjxABrboXwRga67cX/huABqNrvCxbQBgqVuJmOx+4Smkm5xddJewjQOOgWrW/UPFaXCqvQfECYtFzuwUnXqGZMW6eIogjnYo3d96ldVDFAK8IvmwxqaAxbiQijle2YFSO5chpWRQjiwqN7OW7oyc0Z4npbzZEvREgsQNwFIoS3YrsoYWI60uSuh53deLTmXbIdvozs6wPf4cmwRwka+n+tqgawuxiumQ+V7sgrgxZevagJ5/ZpwupQ2LD4Hzx39HJ2u/oDcOdAhdkjpFYP7GvscPMC2OhO1bmiJ7x8YfP3aVM+/GWRL4HRQ3iAEEBdI8XfzpHi3Zm9VMHslJBQ+Fvyx8zQ/A0sJxo8thLSjWesb/FhvggL7B4NU3sovLBGXNxTxnuZEfwfUbpmyPs+waIoooyLJ+Fz7K+i4z3dWIuFv8dSK4w8ZboIvW4LHNXCdJjztfhfZxQJsAW/+rQhD2E9IfUaoek1kmOkKGIahJTwpmu4tRf/O4LHaRKz6mn0oipBA1E3jWFzCrOHx1wmodA1CoL0LX/jFUX4BTwpNyAc8h1PR7mPJpAVMWqvsYvRa3ww6E7FP6HIIlHXko8sOyiSO1AsJ5B3dD93ZPfkvASFUakSoQHCxOf5G1brf1kR7al2a2k15Ueo2fIc1c4gqnUpKN3uJCPJyMAO7uKrjiqRKFmx3f3ZCU4TdhneKbuZN8pnec1/XfUHeCHyEQZHFwqb+UnMJ7K9UESduRTh4xjW+gLlbYPTTNmeL7h28/XsrRBa3D1DRDHSllJlg5u/h81uoXNVuvBRtXRwUGEV2fSLDrZs+3qC2nwCStdByTpwVELMcBHsYOyIosDzq2vlnbjpFm5lUfRg9NVuZ1BwRxj+hXDgH3hXmKYAetwLA19rWcPagqIsEZHmMXG1tdBTFFFQdMsDDWg2aULYpF4jTM/nWE2//x3BA6IGm6RpeRZ4HSwOkZ3u6TBbi8MlzCcuRfhDekSfulP6rMEjgKz5WJxasg85uSU7o/n9WoFGEtFQQ5NgcCIMSfKFL2/MEx2ip4y9QQt3Dxb5Hq0yITaC1SnOf1KaYRtTYhZamOdaH8hsuCZXbRQFfjoAz2T5fEfTe8G/xjcR+dgE5RYRrnvY/Tnk/hyrFCZlEKHY718Kma0Q1IoCL60RFZY9fD4FRia5S8t4AiokDXS/V9S1c5lFYuuEjc2XxGlrdj0vqjVHZIgot5PsW5qkZJ27CoMFOl4lhE103dpO5xb/W4LnLKDKJoIKYoOhBe6hcw9FBlsZe3ZtZvmWFfzXMpNyJYZq5eRUu2C9MEvaGvBTgcjqTouAFUd95tAhicIJ3e00mvnPNEUmIXwOuYMzZw+HuwY3vO2BMvjbClhTa/qqG/qKWS9PRug0hcMlphE4VikqR3cIbf0xPEmmr60TZsBXa09NdOxrWH+H0JY8aPRw0TqIGnDK7T8pyreCMbXZQBcVH6rgUTkt7N27h576bNjyENjLkBWJmpiLqR7wEdWaBKptUOU2r2k9Yd85HxJ69DVCpUpCukwlJPM1tBrhL9hRBBtOiOKHG/L8TZYewgzw2EhRgqatO9SzkcIaEXl31N0HPz4S7qhVu9RkF1nrH27xaSEdw+Dvo2Bc5/Zvb2uptonnot7AvuYorL5WzGkDImG1d/36eSpnL4319+dEdWqVsxlJ5JUkjIc116IpXk1Y6SLCfusrimAm1amKe2Ih5M4UT170EBj6L2/hJoNWlP8Y2AH+hPAJ7SvxCaE9JSIBcfYIkYj7v0J8CHx1pRA+x6vEtM06SUTbLTwAz/zmC9kP0MJdg+BPg1qX/X4madScHZIGF2YJs5vshG53tWezVE4j536ttrOMjRs3ctNNN9G1a1ckSeKJJ56ot01WVhZjxowhOjqa0NBQ0tPTmTVrFjU1vmJvkiR5PxqNhuTkZGbMmNGqGVxHjx6NJEk88MAD9dYtXLjQe/w2wdgRxq0UlXhBRMKtnCjqj3kcptWHYI07gi0gVoSONlF+XSOJUu83Z8AbE2HZjfDKRf9bQsdDYqgQPp7agH/PEomof17kEzpj02DpjXBf5rkjdJpFoxclgHr8pV0qY6u0D6rgaWNWr15NdnY2559/vt/kbB42btzI+PHjSUpKYs6cOcybN48ZM2awZs0ab2VlD48++ihr167l999/59lnn2XJkiVcd911rWqP0Whk3rx51LWozp07l5CQltbuaSEanZitc/QvQrAA7P4XLB0tytf/doWoHydpRDhq7Zp6Ks3SMQz+e6VI3gUxsRyIKhkfXgofX36aJktTUWlrlHZm6NChja7bs2dPO7bk9OByubzLqampyuOPP+63/vbbb1cGDRrU4L6yLHuXAeX999/3W//RRx8pkiQp1dXVLWrLqFGjlMsvv1wJDAxUVq9e7f3earUq4eHhyjXXXKOc6iPQ6D0znVCUX0cryhe4P5JvefdLp3TO/3UOlynK8I8UpdvrivLvNYpicZzpFqmoNExj/b2q8bQxmmYyTfPy8oiLi2twXXNmr5CQEBRFadX8OCEhIUycOJGvv/7a+93ixYsxGAyMHj26xcdpNcGJomJwn6fwq5+QMg16nkKpERU6RcLyG2HbLJHb84cxq6n8z3B2P7KOGt90B2eC8N4Nz6V+CvTv358XXniBF198kWuuuYaUlMar6sqyjNPpRJZlDhw4wIsvvsiYMWMaNOE1xbRp03j44Yd59dVXkSSJuXPnMnXq1DafdroeGi30e1pUJNh0j6j8PfTDczov4WyhLXKWVFTOFGf341u5C5ZkNr/d6eKibIhpJmOvlTz00ENkZWUxe/ZsZs+eTUpKClOmTOGRRx4hKcl/SstZs2Yxa9Ys7/89e/ZkwYIFrT7npZdeym233caaNWsYOHAgP/74IwsWLODw4cPN79wWJIyFS1o/y6iKisofE9XU1s5ERESwatUqsrKyeOSRR0hISOD1118nIyODnJwcv22feOIJNmzYwPr16/nmm2/Q6XRMnTq1yekTGqK2uW3RokUEBQUxatSotrwsFRUVlRZzdms84b2F1nEmz38akCSJkSNHMnKkmERq2bJlTJgwgVdeeYXXXnvNu11qaiqDBomaY4MHD6Zr164MGDCAn376iSlTprTqnNOmTePBBx+koKCgfcxsKioqKo1wdgsefUibm7rORsaNG0dGRgb79u1rcrsePcTkKPv372/1OS699FJmzJjBvHnzWLp06Um1U0VFRaUtUE1t7UxDCaBWq5UTJ040Gu3mYc8eMddQx46tz38xGo3Mnj2badOmqWY2FRWVM8rZrfGcgxQXF7Nq1SoAzGYze/fuZd68eRiNRiZOnMjMmTMBuOqqq0hLS6OoqIg333yT0tJSv0ACgMOHD5OdnY2iKBw7doxnnnmGpKQkJk6ceFJte+qpp07t4lRUVFTaAFXwtDG7du1i2rRp3v/nz5/P/PnzSU1N5ejRo9x111189NFH/O1vf6OgoICoqCgGDBhAVlYWw4b5T7jy/PPP8/zzzwOQkJDA8OHDef7554mIiGjPS1JRUVFpU9Tq1CqnhHrPVFRUGqOx/l718aioqKiotCuqqe0cRZZlZFludL1Op95aFRWVsxNV4zlHmTFjBnq9vtGPioqKytmKOiw+R3n66ae5++67z3QzVFRUVFqNKnjOUdLS0khLSzvTzVBRUVFpNaqpTUVFRUWlXVEFj4qKiopKu6IKHhUVFRWVdkUVPCoqKioq7YoqeFRUVFRU2hVV8KioqKiotCuq4GljNm7cyE033UTXrl2RJIknnnii3jZZWVmMGTOG6OhoQkNDSU9PZ9asWdTU1Hi3kSTJ+9FoNCQnJzNjxowGp1VojNGjRyNJEg888EC9dQsXLvQeX0VFRaU9UQVPG7N69Wqys7M5//zzCQ8Pr7d+48aNjB8/nqSkJObMmcO8efOYMWMGa9asoaKiwm/bRx99lLVr1/L777/z7LPPsmTJEq677rpWtcdoNDJv3jzq1oKdO3cuISEhrb4+FRUVlVNGaQN+/PFHpXv37krXrl2V999/v8lthw4d2ui6PXv2tEVzzigul8u7nJqaqjz++ON+62+//XZl0KBBDe4ry7J3Gaj3W3700UeKJElKdXV1i9oyatQo5fLLL1cCAwOV1atXe7+3Wq1KeHi4cs011yin+gj8Ee6ZiorK6aGx/v6UNR6n08kDDzzA8uXL2bJlCy+99BKlpaWnLhHPUTSapn/SvLy8Rmcabc7sFRISgqIouFyuFrcnJCSEiRMn8vXXX3u/W7x4MQaDgdGjR7f4OCoqKiptxSmXzFm/fj29e/cmKSkJgIkTJ7JkyRKuvfbaU24cjhqo3HXqxzlZwnuDvm3NUf379+eFF17gxRdf5JprriElJaXRbWVZxul0IssyBw4c4MUXX2TMmDENmvCaYtq0aTz88MO8+uqrSJLE3LlzmTp1Klqt9lQvR0VFRaXVnLLgycvL8wodgKSkJE6cOHGqhxVU7oIlmW1zrJPhomyIGdqmh3zooYfIyspi9uzZzJ49m5SUFKZMmcIjjzzi9zsCzJo1y2867J49e7JgwYJWn/PSSy/ltttuY82aNQwcOJAff/yRBQsWcPjw4VO9HBUVFZVW0y7BBR988AGZmZlkZma2Kirrj0hERASrVq0iKyuLRx55hISEBF5//XUyMjLIycnx2/aJJ55gw4YNrF+/nm+++QadTsfUqVNxOp2tOmdtc9uiRYsICgpi1KhRbXlZKioqKi3mlDWexMREPw3nxIkTDBkyxG+bmTNnMnPmTEBMhdpiwnsLreNMEd77tBxWkiRGjhzJyJEjAVi2bBkTJkzglVde4bXXXvNul5qayqBBgwAYPHgwXbt2ZcCAAfz0009MmTKlVeecNm0aDz74IAUFBaqZTUVF5YxyyoJnyJAh7Ny5kxMnThAeHs6iRYt48skn26Jtwr/Sxqaus5Fx48aRkZHBvn37mtyuR48eAOzfv7/V57j00kuZMWMG8+bNY+nSpSfVThUVFZW24JQFj06n4+WXX2bMmDHIsswjjzxCdHR0W7TtD0lRUVG9qDar1cqJEyfo06dPk/vu2bMHgI4dO7b6vEajkdmzZ7Nnzx7VzKaionJGaZOJ4C677DIuu+yytjjUOU9xcTGrVq0CwGw2s3fvXubNm4fRaGTixIlek+NVV11FWloaRUVFvPnmm5SWlvoFEgAcPnyY7OxsFEXh2LFjPPPMMyQlJTFx4sSTattTTz11ahenoqKi0gaoM5C2Mbt27WLatGne/+fPn8/8+fNJTU3l6NGj3HXXXXz00Uf87W9/o6CggKioKAYMGEBWVhbDhg3zO9bzzz/P888/D0BCQgLDhw/n+eefJyIioj0vSUVFRaVNkRSlTi2V00xmZibZ2Q0HDOzdu5eePXu2Z3NUThH1nqmoqDRGY/29WqtNRUVFRaVdUU1t5yiyLCPLcqPrdTr11qqoqJydqBrPOcqMGTPQ6/WNflRUVFTOVtRh8TnK008/zd13332mm6GioqLSalTBc46SlpZGWlramW6GioqKSqtRTW0qKioqKu2KKnhUVFRUVNoVVfCoqKioqLQrquBRUVFRUWlXVMGjoqKiotKuqIKnjdm4cSM33XQTXbt2RZIknnjiiXrbZGVlMWbMGKKjowkNDSU9PZ1Zs2ZRU1Pj3UaSJO9Ho9GQnJzMjBkzWj2Rns1m4/nnnyc9PZ3AwEDi4uK47rrrOHjw4Clfq4qKisrJoIZTtzGrV68mOzub888/n5KSknrrN27cyPjx45k+fToPPfQQOp2O7du3M2fOHCoqKggJCfFu++ijj3LZZZchyzL79+/niSeeICcnp8Xz6TidTiZPnsy6det49NFHGTZsGAUFBbz88ssMHjyYFStW0L9//7a6dBUVFZWWobQzQ4cObXTdnj172rElpweXy+VdTk1NVR5//HG/9bfffrsyaNCgBveVZdm7DCjvv/++3/qPPvpIkSRJqa6ublFbXnzxRQVQsrKy/L43m81Knz59lD59+vid82T4I9wzFRWV00Nj/b1qamtjNJqmf9K8vLx6E8F5kCSpyX1DQkJQFAWXy9WitrzxxhtMmDDBO8W2h6CgIB577DF27tzpnTtIRUVFpb04q01tJjvsLztz5+8eBUZD2x6zf//+vPDCC7z44otcc801pKSkNLqtLMs4nU5kWebAgQO8+OKLjBkzhvDw8GbPk5OTQ05ODg8//HCD6ydPngzAb7/9xujRo0/qWlRUVFROhrNa8Owvgylzz9z5F1wNAxLa9pgPPfQQWVlZzJ49m9mzZ5OSksKUKVN45JFHSEpK8tt21qxZfrOS9uzZkwULFrToPHl5eQCNCrbQ0FAiIiK826moqKi0F6qprZ2JiIhg1apVZGVl8cgjj5CQkMDrr79ORkYGOTk5fts+8cQTbNiwgfXr1/PNN9+g0+mYOnUqTqfzDLVeRUVF5dQ5qzWe7lFC6ziT5z8dSJLEyJEjvb6XZcuWMWHCBF555RVee+0173apqakMGjQIgMGDB9O1a1cGDBjATz/9xJQpU5o8R2JiIkA9YeahpqaGiooK73YqKioq7cVZLXiMhrY3dZ2NjBs3joyMDPbt29fkdj169ABg//79zR4zJSWFlJQUfv755wanT1i4cCFAvcADFRUVldONamprZxpKALVarZw4caLRaDcPe/bsAaBjx44tOtfdd9/NokWLWLNmTb3zPffcc/Tt25dRo0a1sOUqKioqbcNZrfGcixQXF3tDlM1mM3v37mXevHkYjUYmTpzIzJkzAbjqqqtIS0ujqKiIN998k9LSUr9AAoDDhw+TnZ2NoigcO3aMZ555hqSkJCZOnNiittx///0sWbKEiRMn1ksgzcnJYcWKFc2GcKuoqKi0Oe2aTaT88RNIV6xYoQD1PqmpqYqiKMqiRYuUadOmKampqUpAQIDSoUMHZdKkScqaNWv8jlN3/4SEBGXq1KnKvn37WtUeq9WqPPfcc0rPnj2VgIAAJTY2Vrn22muVAwcOtMn1/hHumYqKyumhsf5eUhRFaU9Bl5mZSXZ2doPr9u7dS8+ePduzOSqniHrPVFRUGqOx/l718aioqKiotCuqj+ccRZZlZFludL1Op95aFRWVsxNV4zlHmTFjBnq9vtGPioqKytmKOiw+R3n66acbzM9RUVFROdv5Qwoep9OJJElotdoz3ZTTRlpaGmlpaThcoJWgmaLYKioqKmcNfzjB43A4KC4uRqPREBsb+4cWPhVWOF4JBi10jQKtKnxUVFTOAf5wXZXVagWE8726uvr0nMMJJ6ogtwpcjfn3FRfYy0Fu2dw5J9OG3CqR5GNzQd7puVQVFRWVNucPp/HUjvQym82EhIS0SYSXokC1HUrMUGP3fa/TQEJIAzuYT4C1CAwRENr1lM9fG5cMxypArpWBVW6F0ACICGzTU51TKAo4ZdD/cZVcFZU/BH84jafu7Jxms/nUjidDqRn2l8LRCn+hA0IQORpSahwV4q+9AmTHKbWhNooCx6uElgNC6Hk62hNVYG9jBcvqFELuSEUTx3bZoCgLin6Hil1gzgOnWTS2nXDJcPV8GPg+zF2by48//khJSYn/RgXL4Ps0OPB2u7VLRaW9sDjgh33wl+/LeCe7qj1fv1bzh9N4agueyMhIAgNPTgWwu4TAKbOAq9YN1GkgOgiC9aIzlhUoMkNSaO1G2MXHe7ByCGy6AGhLKTZDlU0sRwZBbLBoy+Fy0c7cKugUAadags0li3MVm4Q5D+CQA9IiIKhutPbmB+DAW/UPojGAIdL3iR8HfZ4EbRtP6wqszYV1J8TyE+vjuai6kM2b36R79+5kZmaSlpaGtOdlMB2DLY9Ap5tAZzypc8kKaNQSdypnAU4Z1hyHBfvgl4NgcgBE8cNRqJFlHhp+duoWf1jBExISQlBQ0Ekdo9IKOZW+DhcgUCc6+fBAX6cTESgc/GVmiAmCAM+v6azxP6CtrE0ET7UNCtyHDtIJYSdJEGIQbSt2mwFLzBB7cn0qiiIEW16NT5OTEL+FQxYCLjVCnNPL8XkNH0y2g7VQfABK1kLJahg5XwiiNuSrbTYgAAA7epaEXM+k6k/Yv38/+/fvp0OHBG4NX4cexP05/i10urHV53lxNbyzCZ4eBTdltOkltC01h2Hf69D1dgjvdaZbc8bJqxYDtD+CKVpRYEcRLNgLP+wX731tdDhwouf1DRrCAuGO885MO5vi7BSHp0B8fDxxcXEYjSfX81qdwpTlETphAdA5ErpFCQ2j9kg33ujrlItM4ruNGzdy062303XwFUgxg3niubdFR+eyeffLyspizJgxREdHExoaSnp6OrNmzaKmxiewJEnyfjQaDcnJydx06wxKi4vQakTn79eWECGMAApMQu0GsNlsPP/886SnpxMYGEhcXBzXXXcdBw8erHftNqcwJx6r9AmdUAN0j4bkMHGtLkVoehVW906yXfiyADKegzFLYMRcGPwOZDwP6Y9g6fRnvtU9xRr7aChcAUuGi46xjSgsq+aXQ+LHSI+0oZXALgWxPGoGtqAOANQU7UfvKvPtdPjjVp9nfym8vUn8Bn/Pgq0FbdL8tkd2wqrLYN9rsPEUcr32vQEL+0De4jZr2mlDaTjKx+yAp1bCsI9g7BxhNj6XWX4Exn0Gl/4XPtzqEzrhAXBdH4WvYy9jTVQKadoDAPzzN/jvzjPX3sb4w2k8kiT5BRMoioLVasVsNhMVFdXkNAAuWWg6siI62c6RYjK6xgjQCWFUZhHO/VgjrF69muz1mzh/aAYlpZW+je1lENSBjRs3Mn78eKZPn85DDz2ETqdj+/btzJkzh4qKCkJCfJEKjz76KJdddhlOl8zqLft55bknyM/N4ddfl2Ko40DXSNAxHA6U+fxAaWFOJk+ezLp16+pNizB48GBWrFhB//79kZ12iq16isyS1y6s1yh0CFEID9QgSWDQKrgcNgqtAciKRI5HOLmsvkZ0ugmCk7z/muzw2Q54fweUWEBC5u2wq5hY9R0sHgoXfA+xw5u6nS1izppCnJII4Hh4sJUqKYD7F0OVM4CVMbfzt+7bcO6sYwosXAE1RyCkU4vP8+IaX0CHU4a//AI/XyuCOs4qDn8ElbvEcvEaYfZtpXlTlmXWbFjKQdtopmfdSPCEpRDZ7zQ0tg3Y+3+w/XEY9CZ0vtn79cY8eHAJHHW/hqUW+NNC+HZ6A+bis4XitSBpIGZovVU7CkX7Pf7dAC2M6wRX9IRRqRBQsR6W/Aha+Dz8Qq4y7aDAFsqjy8Uzekm3dr6WJvjDCZ662Gw2ysvLARFo0JgmpChCHbc6xf8dQpsWOh7ijULoKIowg/3lz3dx7w0jAEg77wrQuJ9wt+B57733yMjI4PPPP/ceY8KECTz88MPULRTeuXNnhg7NJLcawroOx+ZUePqB25AcNRBQP5QuUAcdQnzX8cy/XmXp0qVkZWX5zTR6+eWXM2TIEG688UZW/L6GEqseO0IgS0B0kIzBUYFsAQIjkWWFiooKHFYrsYGhlLpCcMoS+TVgsumQFQlNaGev0Km2wafb4IMt4rfx/sZouK9mLkma4fRjIywbC5kfQ9q1zf/QTfDzUWE/iZCqGNszHEkSbXhyJZyolnjxcH++HtgZDoALHRpcSChw+FPo93SLzrHhBPzqVtL6x8PWQjFIeWw5/OfiU/eptRmOatj+pO9/2QblmyEms0W7l1vgmz3w5TYHR6oWALDWPoZ3V10KE9ZDUPxpaPQpsu//wGmCg+9C55uxOuGVbHhvk89y0T1aaKy7S+CJFfDvC8+ie+ahdAMsHSkEz8RtEJ7uW2WGO34SQidQJ0y9l3QTFhkvJ370LnbUHuPznv9m+v6/U2aBe38Box5Gp7Xf5TTFKZna/vznPxMfH8+gQYPaqj2nhMvlwuV0oFTtB1MOKAoBAQHe2mXV1dWNFtYss/o6yfAAEUDQEvRa4d8B4RuxOmoFFUga0LpXOi3gtJCXl9foTKMNaWNlFtEZAERHhKAoii+AQlHAXumndUQHCfMYwIfvvMG4CyfUm95aow/izvsfY+fOnXy/bBN2RMdt1CukhdrRmIuw26xYrVYsFotfu1zWapKCrAS4Na4qVzD3Vn+BLWYslVZ4NRuGfwwvrfX9noMT4ZnRYoRmlfXcaskij26iU1xzHex89qQj4PadqOCIMxGAcR0qvZ3JTRnw8DCxfKgcbt5yFVVyGNroQUgJ48WKI580aqKpjaLAC6vFcngAzJkiRpkgbOzz9pxU008Pu//lM316KF7d5C6KIrSD+xfD0A+FeeZIla9H+8V+Jb+WZ8BvU/013LMB03EwHRHL5VvYnu9k8lfwrlvohBng1YtgyfUwoYvYbN4e+OosND+x52WR/yc74KhvYOqU4a5Fwu8K8K9xcG2fOkIH4MRPfv92c2Yx53Lhj3XIMGuhGECdDZyS4Lnuuuv4+eef26ot9TDZYUtByz9rj1pZva+ArfkutuSa2JJTxtZCiSOWcHaV6tlRrGXNEUv9/XJhxRHYVyqc5yXVZrYePIipMq/RtsmyjN1ux263E6G3o5FEx5lvEj2yggRIoK3lzbSX0b9/fxYvXsyLL75ITk5Ok9dvdcjkVDhx2O3kHNjFJ2++yJgxYwgPDxe9hekYVB+Aqn3eDlSShD+mKC+H/BM5ZI6djFP2BQ0cKRfXOWDUZAA2r/uNQMlMivYQHTT5VFeUeIWz0WgkKCgISZKIiIjwVoEwVZWTEuogWCe2+8F2LVP2vcDwj+G1db6ou2HJCnMutfPNVUIQzO4vOsQSWxDXm9ZTHeDOb9r+JGTf6h8J2EI+WVuOIonH+NbMCL91fx4MswaK5Z2WrtxW9SOW8EzofKv40nQMilY1e46lR2Bjvli+a7AIMHl2DKSFi+/+tlIItzOO6TjsfVksd7gYIvqK5UYET5UNPtsOE7+EK7+Bb/f6zDgDQ/bzUuitJGiFI+vJmjepKdoO6+5o1zD5ZnHfP7ui55WqvzLlGy0H3K68C1JgyQ0wNV28F/++UER8Ajy1CrbV9tG5bFB98LReW7lFBAR4/MF+mI77B+kc+6+3Lc/9Dtm54uvbBsCUhqa/Mh2Him1iWR8m/lbvp288fHSZe9DnhFt/gJ1FDezfzpySqW3EiBEcPXq0jZpSn/1lMGVua/Ywuj91MQCxrThOMNCVBRfvYUBoHGj8f6aqqiq/QACAQCkEsyYMkyuQam0owRq3zU7SigfBUQX2Mh568EGysrKYPXs2s2fPJiUlhSlTpvDII4+QlJTkd8y/3DUL7vJNh92zZ0++/fZb7HYbGvMxdK4qsUJ2gMuKXdZhs9nQaDQ4yo4CEJeYwpFyGacs4ZB9mktISChh4RHYinbRTbsbCTDZbUCEV9DUjgrUaDRERUVRUlKCoihUVZTRKUxLviTeot1VUd5tR6XC3YNk8jb+wO7F+Qy66SaMRiM9pQOcZ9nJ5qCxHDZHcI3rVxYkXY6+ejsc+RRMR2HwW62Kwvo1T7xkHTWF9O3obwaSJHh0BFSZzHy1N5j1jgv40+HOvD8gGoM+HByVcOhjiB/T6PGdMvzL3W93CIFb3JFsIQZ4fSJM/Vo4sP+yCL6bXiuy8Uyw7XGhkUgaGPBv2P8GVOygsmgnB/IUDpZJHCiDg2VwsFzkfdXuZo16ocld3xd6rZkApqOEJwzgjgP3kC935GXzMzx19H6I6A29Zre+fYosUgusxWArxmUuoOTEXipL8zD2vJ6k7ue3+pB5x7exznod71keYrdzgPc6nhgptILaRoSwAHj3Erh8LliccOfP8NO1EBUErJoMBUth+JenbPqti80Jn2yDN9ZDlR0iA+Hli4R/xsuBN4W246HmMJRu4LuSIXy4RXyVmQw3dzrBokXbGTZsGBEREbV+iIW+5a53wJ5/gyUPHNUMTQrl7UuEqa7aDjctgG+mQZe2DSxtFe0S1fbBBx+QmZlJZmYmRUVngbhtDY7Kel9pGqjIGaSY0CAenEI5CZemlqZjcHfKLhsRoQZWrVpFVlYWjzzyCAkJCbz++utkZGTU04Buv/8JvvxlAz8uyeLDDz8E4PLLL8NWuscndDy4zNjtdqqrq6msrMRp9rXb4tR4hY5Wkok3Qs8YEZCgx4nn3QzQ2DAY9MTGxjYYiq7X64W2hTBrOs2lxGgKuTn0Y/QahXGd4Pur4ZPLZPI3/ci2bdsoKipi+fLlgBiovHpNF3qxG4CdtjSmHXyPmvBR4gRFq2Bhb1g6Co5+6RcJ2BBrDpRTrEQDcHFqw4nCkgT/7L2CyQFiBLOyOJmrPi2l0DhBbHB8nhgUNML8PXhH0A9kCvu6h37x8Ihw57Gr2Cegzghlm+DoZ2K5y0z2OHpz08EnGVSST78T+7nyG4nZy4TfbeUxX7klgF4x8NxYWD8T/jkWehlzxSAAGNvZ4DVRfWL5C9sdA2Hro5D7ffNtqj4EG/4sIuO+jYf/GmB+DCxMh6UXoF0znfhjf6N7zTvw+9UsWbIEp9PZ6OEURURdzt0FDyyBER/DsC0vcV/1F16hkxm2n1+uh+v6+oROTU0N8+fPZ/ny5XSPVnhhnPj+RLXwfbis5ULoABz9ouW/eTMoikjoHPuZ0Fqq3Ap9uRVm/CBMmnYXbv/Ue2JlwnivlWTnrt/46zLxdWIIvDbezryv/8uWLVvIy6tjjfGY2cJ7CW3XQ7WIbhvXSZgcJUSQxQ3fNqJ5tRPNjs/69+/f4MOwZMkSEhMTW3SSmTNnMnPmTEBMhdpSukfBgqtbvDmlpaUES1UEaevYoQ1REBRPdXU1Nnctt4jISGocOkrc/pOwAIjTFoGt1Hf+cAsOcwk1Zg2Rkb7hQXBwME6n02uG8qCzOCi0ajErIZi1tcaShggwS+JJtJUhGY2MHDnS63tZtmwZEyZM4JVXXuG1117zRk4lJqfSLyODCLkYqXcXOiYnc9GECSxbupQpk0ZjJwiDxiHCZ51mFCUASZJQFIWEhAQACnOF/Vuv2AhUTEQZdYSHhFFTU0NFRQWJCTG+9ksuosNDkJooMRQcHIzdbsdsNqNVzEgo/KPnMp7KvBWtRkQR/vDDj2zduhWAlJQUJkyY4N0/LS2Vr2+3MeWzUg5ao9miGcrM7Q/xTNdIujoXIikOdxWELAi4FzrfAl3ugLD6ITmfbqgBItEoTm4Z0bhGqy3fxKuhz1KjhLPSfjHbLMncl/cUnwfNQ+uywLGvoevMevt5nNQgnNNXptfbhJkD4PccWHVMhLeOSKkzkm0PFAU2PyiWdSGYej7DjG8gr6b++xkVJFIDukaKwrIDOwgB6udeLPrdu/jed7u57sK9rD7ekxq7ltmmj/kxfAC6NdfDhashsoFkpvKtsPtfKMe+5lvr9fxmn02MppAkTQ6J2hwSNTkkaXOIlEq9500KzOPrDb9w8OAhxkyaCsZ4is2icywyiaCAdSca7yzjNHncFfQCNyesRRO+wft9ZWUlc+bMoby8nL59++JwOJjS08DmAhEAk5UDr2WV8qD32lcJC4InKMiNyS7OH2qA9Ng6eWwNsOEEPPubCELxMCABrkoXftASC7y3GTbkwes9v6Oj3W2r7f0E7H+dsmMrmbVjOlaXMJO9NxkObl9HTU0NBoPBOwAERJWQQo+Emgyh3X3rqvZDlEjkuayH0HgeWy78RZ9vhweGNX0dp4tmBY+nAzkTGA3iZrUERVHIl21E6M0YNA5c2hD0GgWNoxIwQVgQrlgjRUU1InrMYKJUH050MARoZVIDSwm05UAoOKUgXIqOAGRkuQaLLYSQkBBvkIJGo/FXc93Euoops0o4CKDQJkx+DocDlyKh1YeL8jn2cghO9nvTx40bR0ZGBvv27QNq5cgAwUo1Wo0Gg15LZm8xut9/MAdFH44+tIuwS8tV4DITGpZCaGgoiqIQHx9PSkoKm7IW8ej9f0KLjKIEeq9h4UKhmo/MHICM1qutSc4q0DUdWREWFobLYUEniX0ckcPQe4XOD95npmPHjlx33XUYDP5vaWhwAPNuCuCSz+2cMBvIDprEi/srGJ88lqsHW0R0Us1hsJUIk8Gef4uqB93+BMmXg0aPU4b1VSJHp1dgPsnRHRtvcNkmDJKDd9Oe4+LD/TniTGCNqRezNd/wYsBVaA5/3KDg+WSrL2F39vCGq39rJHj5QuEnKTbDQ7/CL9eJvKp248QPPl9Vr7/y8pY4ryP6auOX9JdW0jW1G11HPCzMSs1RLASPyRlMiT2KkqM7eWhYT55eBbsdffnI+iB3SC+KXCFPpJuiiMHC7hcg/xdcioana/6POdbG84gMkpOEICtJjvWYlFByjJ2otEfyr++bL7aXHApDI44wpPxZhuqzSOuSiXTsc6jUC01ZG0BpaSlz5syhqkpotL179/Y+i0+MFEmYm/PhP3u60j9sEuMCfhY5d6UbIHY4NqcYUPywX0Q0WmuNv9PCoXcc9I71fWKNwof6wmr45ZBv25Rw+OsImNRVvPYXdRGa1ppc4WOeVHQZL4ZcwcSEIxB3AU5LKXfv+BO5LvFMvzAOuoSa+c9qoVInJib6D/oLlvmCPpImi+hSbTC4zFC9z+93u76v8DOtz4PsMxho8IcJp/ZEejnRc8jZC8UpUjsNkh0DNgwVDgxBElJgOLgclCthKIi5bKKkcvS24+I4ioYSWxh6jYMAPWhQCAlomcNR46omXmMiV07D5tLgdMlYrVbsdjtBhiiwV1BUWEBcSCfQ+2rsWK1WTpw4QZ8+fUQJHveoTivJxIToCTAEYbAeY/M+ET7VMbUzUmgXYcvXGYWpyFMbzZ10qtVqufvuu3nkkUfYvmUTw4cP9zvfc889R99e3Rk14jwkQ5h4cF0WcazApkNmNRoNESE6cFu39EnjURSFH3/80U/oXH/99QQENJzkEhkEn19lYMpchUqbhhXGaVw7oBR6JUL6Q+JlOviOMOkoLjGiK1wmBNC4paw+DmU28fj+ZXxy0zembBMAgdG9udu+mRcP9qZQl8o31VMJcLzJs9yFVLUfwnwjxQorvLlRLA9ObFqLiTXCKxfBjQtEFOJ9i+HzK9ppmgrZIUoAAQQnsz3qQT4Wlk0u6gz/Cv8OKXce2HpD0MMNH0KW2b17N7GxscTHx3sFT441BZAoKSnh9qmiw9paCK9YnmWiYS4dzcfgtysg/WHY85KoTAFYlQDuq/6KRbYrAAiUa9BqJEx1/K92RUeOOYQcxoovGglv1mtE5z0kyf1JFAE0rP8XWD8SVUE63QDHPhe/R8V2Cp0pfPbZZ5hM4mUaP3483bv77q9BC29NhEu+Eqan+6o/Z6FuIEmaY6zdfYgfHMNZdNAXKFOXo5Xis/CA77vYYGFGc7oDJcMD4N6hcENff99fnFE8H29uhFfXKlTJYfyp6ltujtrDYy6Jl49dymqHGCDemrCcqeljWbz4N2w2m/daPJaW48ePk3TiR+EzMURCzDDRL4R2E8EGVfvrtX1YshA8WwqEMA08A1LglE55yy23sHjxYkpLS0lOTubVV19l2rRpbdW2VuERPBYl2B1RBiBhVwKwEyAM2iYQgQM+ksMUjOZ8tIqMAlQ4I0GjR9YYUChHQiEsUIGWTCftqMFZVsiK1dk4ZR0Wi5WDBw8yb948YqIjmZiZyMz7ngVtAFddfSNpaWkUFRXx5ptvUlpayqxZsyi3+IpxVuQfZdeObSimHI4dPcozL39AUmICE6+4WTxc4AvXVmQRnlwriu7+++9nyZIlTJw4sV4CaU5ODisWvCkeYF2wqKvmsog8EEX2Hb8RtLJb6khalJBu/Pjjj2zZIrygycnJTQodD50j4b3JEjd8Bw45gCc2JrKgC8QZNdDhQvEx54mEyIPvgfm4ED6mY8zfkwqIEihjOzWRkGEpBIt7aBc1kAR7GBdt/ZLFITdSpEvmc+ud6CU7Tx36BGnAc97d3tro63T+OqL5nI8LUuFPA0U5nTW54u+fBze9T5tw4B2oFp2Ls+/zzF4ZiKwIB/vfR4N0YgTkzhMJpfZyv1JFHoGTlZVFcXExPXr04JqpE6FiOwD6xDGQL0zYGknh+XESk78Ci0vPk/zCx0o6UslaEWbtplKXyu3mlayzpQEQ7zzGeNN/uWbKRHr06kdBjfCt5FX7/hbm7SLUvJ1YXSnGbjch1xQzrG8XYoNFJx0eoCBJDaQbeLS8uFEQ7UvpqDi8lE+Wa7xTpEyaNInBg303w2azERAQQIdQeH2CixsWQJUSyXUVS7ESRHFJB7/TxATBJd3h0m4gA7uKhE9vV7Hw/3kEjaeKgEELN2fAX9wRkA2h1cA9Q2Bo4UPcc/R+CuRkPj2azsrP4Vil6GuG6lfxuPY6Ksp2sGGDMB/26tWLpKQknE4nixcvZuPGDfy153eiWFSHib5AqLDuQvBU+wueyspKhiSGAhrsLiF8hjUzbjsdnJLg+eSTT9qoGaeOxw/lcl+SRpKJN4of12Y3Y3dpcBBQSyhBTDCEyydAcaupgQlERSX5HvDqamEec1SAktx07+Oyg2xn997D3Hfbn7xf//zzz/z888+kpqZydOdy7ppxFR99+RN/+9vfKCgoICoqigEDBpCVlcXQzGHsq1VQ+ZWXnueVl54HICEumuGZg3n+xdeIqOVv8it06TT7CR6dTsdPP/3EK6+8wqeffsrTTz9NWFgY48eP58s5H9I11t2zaoNBi6ippsjC3OAJyWwMh3sCIG0QDqeT4uJiQAidG264oVmh4yEzGZ4fJ0xUJ6qF0/XrqyBIp4j7EJwIfZ6AtOvhh84AVOcsZ/GhWwCJS7tTr4qDH+WbfctRA4mw6tFj50LT52zp/BC7S3V8bLkXw5a3ebSfC0mrJa9amNlA5H4MapkrkweHidD8bYXCNzS+E/SIaX6/k8ZeATv/LpajBvJhxXXsFreBh4dDYigQU6syRPFaSJqEzWZj27ZtbNiwwa+Cd1FREfb8LAzusAMp7nzgAE6nk8rKSnrFRjDzPJEjs6KsJwtT/8Vkszu6LSiJgk5/46Ydt7GvUtyQodEV9Dj0OTqcdO3alQCdKPWUGlHnOvKOw8rrxHLPPhA/2m/1xo2b2LVrF4MGDaJ79+7CXGwphKq9YoO4URAQDSGdoeYwx7bMx2q9FEmSuPzyy8nIEH4oq9XK6tWrWb9+Pddffz0pKSmMCNvBI8aveMH0L47Lnb3nDDUoXNxF4vIeMKyjKA7sYWit4FOrEw6U+gSRXisiH1NquWAapWIXQ6tfYVHkpzzAWlaUduOYOyaoQ5CFN4Ono7cXsXvF67hcWiRJYuxYoR1qNBoKCwtJCCggwOW+h0mX+o7t8fNU7fNaQoqLi/n000/p2Kk7Bs2l2GWJ7NxzUPCcTXjMS3bEQ6+XXMQa3U+LbICq3cguBw4pCHtwNxRJT6hUAdXuYH59KFJwkr9w8fhlXDZhimrK9+EuDDr6/IHIDjMHK4OwOEFCIUqqJCkuAuwVXDxuOBePGy5UYYP/01liFoleAJVmB2G2vT7bbWAcBHesL/w0BhGyrbiETZcov9UBAQE8+uijPProo/772Uqg5qhY1gWBpBPHVhRhbmtK8LjDtwHQBmIwGLjhhhtYtmwZ48aNa7HQ8TCtl4hWemODsLvf8FUFlyo/cvON1/siCEM6QUhXqDnIjzvKsTrF7zAx1UxdLdYPt5kNjQHCexNpET1zgGLjX0PymL0mjN2VEbxbfSeGZQd56KKuvJot8lk0kujAG6K0tBRZlomN9QU1GLTw+sVw4edi/8dXCCHa2krWv+fA7GWiQ3h2TMOmEFmWUbb/A607GCan25u88ov4rfrHw02e6jZRA4RW7LJQc2wJK7fIbN++HYfDN1VHZGQkI0eOpF+/fmh3PiW+1AZhTBaCB6CkpISIiAjuGyrMS7lV8HTxQ4wcrCc8LIoDoddy0w8Gr2/p+j4wyrWB7EMiCCc4uIl7FDdKDJhcVshb5Cd4ysrKWLJkCQ6Hg6NHj2IwGOjZsyeZiTl0qL0/IEcORFNzmAT9cbRaLVdeeSXp6b6IEEmS2LhxI3a7naysLG644QYoXsOfgl7kiKsbi1w3cb70PZcFfsWY8XcS2PFC776KonDkyBFCQkL8EsADddA3Xnxazb7/AyBKV81Hl4fzwR4RGWnQwruTdcSucYIdgot/AK7gvPPOIzpa+Hk1Gg2XXXYZu+Z+Ka5d0UDChb4w5bAe4q+zGqyFFFVrmDNnDiaTib07t9Cj80XsKAs8Y36eP0yR0ODgYOLj4nCK+sPoNbUy0jU6MHZCg0KAYibUfoQwnQ3J0/FqdKJjq9upGyJ8yw2EVfvhqUit0SFpA0kKE0JHQaJcCcXmVERnLrmH5/Yyv91l2efbCdbJGM27fJ17UELDQgfEdzr3S+1sRXyk0+JtL5Le7S9y+52aCC8W62vlMLlNfQEBAUyaNKnVQsfDQ8N81QA2lUfw38J0Vq9e479Rh4sA+KFEROmEy6UMTGzGEe0RPBF9QWvwi06UzWV8flUw3XXCd/b6nq48usxXiWBaLxEBVpeDBw/y/vvv89VXX2GxWPzWpUYIEwqIiKVvdjfdvLqUmOGeX0TH/s1uuPE7qGzAz1CVtw1l738AOCEN5s9Z3bE6QacR4cJe/5JGD9GiQVWHFrFp0yav0OnQoQOXX345f/7znxkwYIBIEHb7d4geSnSsz+RUWioEXLAe/ulOeyo2a/hX6f1sDLyZK+f7hM4DmSIsu6xUjMRjYppR+3RBEDdaLOcv8lsVFhbG8OHDCQsTAyG73c727dvJ3STCnu1SKMcqjCiKgiZGXGesoZjrpl/uJ3RAPKNDh4oaaIcOHSI3NxdK1iJJ8GKHf7FjFrwddQsTA74jsORXv31//fVXPvvsM95++23eeustsrKyvL/JSWEt8YW/p12PJiiOO86DdbdB1s2QkaiHlKsA6GncS6ABRo0a5XeImJgYzosXYdU5lo6s2VirhEatyLayY9l8+umnXn/XZZddxqjOIshic57iFzTRXvxhBA8AiguHV/DUCQjQh4oOHETHWrUHFPcvHtJZjIjrotH7TFmeid0awyN4dEaQJIL1kBBkBxRktBypAKei8dnY7eV+5VpKLD5bcbx0DC2ic7BqoutFwYEY8TqdTvFRDOKvtQqnw9FkLoQXl9sgrQ32HVvv1sCc5qYnr3O6zWyStuHf7SSQJHhxvKh2ALA3YBD/t9buNeEB0GECea5ksh0XAJAZnk9gYDOCrsxtaosSJQyCgoK8kU3l5eVEhxj4YsAXdNEKs82XO0Uh0AAt3F+nTqPL5UJ2WLAVbcFms1JeXs68efPqlWG6Y6AIVQaRv1HmL5saRVHgr8uEs9vD+jy44isHX/+c5Y3OAog4+gI6jQuXouGfOXexvUo8V73Mq1n639eZP3++N0qSWJFslKA7hl4LGRkZzJw5k9tvv53+/ft7K1LgskPpOvc+5/vlbdU2yY1Og8vc/doXO+G6b4Vw1EgiAuveoeJ+evZpVvAAJE4Ufyt2gDnX+7VOp2P06NHcd9993HLLLQwcOJCgoCBSg44CcKg6iW+/WyA2jhJ+Ho2k0Dmy4cHT0KFDvYOjrKwsKHEPbmKHi0KqceLZ8ub1ALt27WLt2rXe/4uLi1mxYgVvvPEG7733HqtXr260FFejHHrPN7DscZ/365jgWlOapF4DQKDWxrWjIggNDfU/hqWAULsQNvtN3Vm5cqXvfakVKLNh2efeCTEvv/xyBgwYQNl2IeDtssS2QtqdP5jgceJUhODRNXRlQYk+c5ns9H3XlFnJo/U4anz71EV2ic4aQOeLow0PlDDK4gWwuyQxXbUh2t1W2atFeSZdAwiWTIQoYiRV5QxDDmhYh58xYwZ6vV58IjqjTxiGPn4IeoPBGzLdKIpSq721TCC1f4emtB6P4NGHNr7NSSBMDBJdwsXvvCFgLM/O3+l7qeNH853tRhT3Y3tNRjNCx1oCZndSrlvwSJLEJZdcwjXXXMN55wnNKS79Kr4MH0eqxjdVxIz+olCsF9lF8bqXqP6qA72PTmdKd+G0PXz4MEuWLKl3HR6toMIqhE9L+Hq3rxDptB42xseK0eyhSj1P78tg8QZ3jG7xWsj5GoBt2umsCJwOQKirjP7WVZSVlbFz506WL18uUgdihODRKDYeuHUsU6ZMISkpqb6zvnyzrzOMFVUE0tPT6d+/P6mpqX6b/u0CX60wT+HK9yeLagEgfK6e4rwtEjwdJvqW836pt1qSJFJTU5k8eTIP3n0zcQGig821d6F3797iWqLOwxsaV7qh3jFADDyGDBGaUd7hzb7pOWLcCS2eOn7lW705fR4BGhoaysSJE+nUyRfimJ+fz44dO/ySyu12e72ZkP1w2WH/m2I5fmzjVb9jLwD3tB4p8pr66/N85coOWdNxuVx8//334n0xROLSi74mFFHvacqUKfTv3x+AEZ0D0bgrJfx+tP1Vnj+Ej0dRFCwWCzrJjhMxQtM3ZFiXNGDs5NZ2FLcW1KH+drXRhwNuQ6ijUjgx6+KqZeKqJXh0Oh1BigmXosMqGTE54IQlhGSNHkl2iAniDJGUWITwAUjQ5CIBlc5wTC4jMY0kcz799NPcfbc7R8Jp9b1AxuTmAwNku688h7aW30obKLQ82dH4tcpOn5lOFwJUNH2uVhIeAJ9N1XHJ5zbKHQH8YL2Avkt2M/PiPii6MObZZgCQoV3HyH7nNX0wj5kNINK3bb9+dV70yP4kRMfyFWO5w7QYXUQ6d3qCoBRFlCPZ+lcSKnd535i+kYfZmjaBo0ePsm7dOuLj4xkwYID3kJnJIllw3h5hMpveS4QCN0ZOJfzdHaSVbLQTvP5lQhQH5wWMZHPQWEyacJ7a05vO6QoDd9wvNtSH82Xox5gLRUDJPy5wkOQaT35+Pnl5eZSXl1NUVER8rC9LMLB6EzCSBvGY2SQNuPepnfxbm1gjPDkSHl4qIgs/ukwko3qw2Wx07dqVkpISPz9Yo4R1g5AuUHMI8n9pMK/Kg7bUVyJi9NVP4wzpLf7RhwnfRtVeKNvY6P6ZmZlkZ2fT0eDTrLxBGAnusgYoYuqMlKsYNWoUcXFxhISE0LFjR4YMGUJ1dTW7d+9m165dfmHaANnZ2fz2228kJiaSnJxMx44d6dixo68yfs43opwN+Gk7tZFlGY1GCynThS/oxI9i8KuvlSDmqVYQ0pV+I6axdNkyTpw4wdq1a+ncuTOumlCSA0qJ1pdyxRVX+D33/Xv3IGbnCYp0Kaw6ZOXBEe2ZePYHETwul4uKigp0GidIEQDotI14dHXBwkntrIbAhObjZLVBwpwk20WgQUOdscfnIUl+UWYajYbAwECCNQ5KZRcmh5Zyq4TBkEY8B8BRgdPlosSkASSMUhUhUjU2fQImmxhB6RoRPGlpaaSlpYl/FAXKg4UWFdTBb06cBnHVsuVoa2k8kiQEra1EaDzuaBg/as+uqg+lrQUPQFIYfH6lniv+a8cuGfjX3q7061ZBQHAoh52isOjVQR+hV3oCTYQPeSLaNHpfwczG6HwrSRX3sTCsF1y8AwL6iLlsts72dciAQ9ah1zjRVGxl2pWX8/6Hn1JRUcFPP/1ETEwMHTv6ElkfO19oMJU2EWiw8NqGI/BcsqgMbXKIvLIpASsxK8LUeVn8MYbGHeXdQ6mYXAau+9bJW8YYxgXAmsT3+WajEDpXpsPUQfFAI17u8N4ipLp4NfS8r+FtPNcZ0a/5wQswvTekx4joueg6sQNGo5Hrrruu2WP4kThR1Jcr+LXB6gFeCt0S2hCJPuY89LVD/6MGCcHTiMYDwh88ePBgjPtFErWsDUET7hZeEf0gIEa8AwXLvH6Wuv6i0NBQhg4dytChQ+tNZ3L8+HGcTic5OTl+ZbAiIyPpmJzMxZoXCALRDyVdUq99sizz3nvv0aVLF0b2uoLAff8n3tkTP0Ca+zd12aDArWknTWbYgOHs2bsXs9lMcnIyWq2WEmcMyQFHSY22EVhnsJWUlESqdj1FpLCrPAibs33rDJ51pra6N7EleNRaudbl6JvK3jOEC7+JpgW/tCTVMrdVNlxG39MZa4318l+ioqKIiIggLULrnSG00B5OuRIFikJpVQ0uRXTuCZo8CEnDqggbj0ajabAuXINt9AYYNFyzzL+9nhwcyb96Nvg6HNnpL6A8eMKoJQ2K5uSmFm8JfeI1/Hu0GUmRsUuBzFyo49XfRLsDsIraa4XLmz6IR+MJ7wPaZsxyadf7Orpd/4SsK+DXEb7OOCiJ3xy38E2BO09NthFsP8Q111yDXq9HlmXmzp1LZaUvCCU6GB51173cX4q32GNd3tvsq35992AZx3ExWh86dCi33norf70kjfcnSwRoFawuHbdXLeAz+UkeO3AlIIpOPtGIEuPF7eehZHXDFZgVxXetsS0v1tk3vr7QOWk85jZHlTcZtUE8+TuxI+vnm0W7VdXq/WLKkEYYPnw4HYOExlOsdAGNe0QgabxFY10tnHnVa7Lc9zqsuYGLuhxn/IjedOrUya9qR3l5OeUHFhJkds/J0OPeBvPltmzZQmFhIWvWrGFfcTgY08SKY//1bVS40hdMlDQZjUbDtGnTuPPOO0lNTSUuLo5OGaJmW6A9t56bQJIkhncU53aiZXNe+5rbzirBo9frvUlfrcEjeJRaN1HXlmnjHqe7J8elNori+07fuLqq1UBahMjCBsh1pVGpRFBiFxpSiFSJMTQOAmK8UUfN+mr8TuB++10tEDwNBRZ48PPzNPDieoMoQrDabK1rYyu5PCOC21JFOG+lEsKKfCGQxxgWEq6phPwlTe3uEzxu/46H/Px8PvzwQ/7973/7nOaBMb48iGP/hdwFYlkfAf1fgEv3s7YwnVxLrdI8JcLEdsUVIkPfZDLVKzF1dW+fCeq1dWJm2NrsKoaX3X1sRjxcGnfMe/+7dfPVpxvfGb487wvCpTJc6Hii9B8cqRQP05MX0HwpHI8pyZLvLQDqR9VeX53CWoLH5XKxZs0afvzxRw4fbrvpyhskfjRo3AOEvEUNb2Mr8ya4esKo/YjyJZL65XDVwRioIzFQSPvIbpP81pXo+gOgNR/h2O7fWtJy8axtugeOfkHs4UcZUTSNm+LeYPYldv583TAumTSJfv36MSJGDCoUfbioQ+jGEyHncDhYuXKluLy4OPr26+cNMiD/F3H9AHluM5s+TAhgIDw83O99DOngNi8rzgbv+aQBCUhuk/vPO8rqrT+dnFWCJzY2ltzcXCwWS6s0H5/G47Nj6FuiKbQUfahvZGKvqHNyi08L0jVtJ9VrhfDRSGI2zmOurt6E14QQDQSIUChPVFpjZrYG8Wg8skOYBZvCK3ga6K00ulqRfHV6SdkFThOKAhZXILm5uS2z358Cj13WlUlRB/y+G2tcLxbyFzc+f4qt1PeyRfn7grRaLbm5uZhMJq8DHPDN0wOiA0x/GC47BL1mY7IJP6JFDsamd2fclYp2pKenM2bMGC688EIuuOACv3NpJFH5WSuJZMO/rfA12eoU5XUcsnDOvzoB7Fazty6gn0PfWsSg3LuYHzGCRJ0vDGlER5ja0PwsdfFoPNDw/Dy1zIm1t9VoNKxatYrNmzdz7NixFpxIsH79erZv305ZWSs6NF2wL4enMcFT/BveutrxDQieyP6+lIUmzG2Ub/FGjho6+I5TVVXFd2t8z320c2uLms6ef9f/rmwDmp1PE7NhAoNyp3BFh5/oHiy0HanLTO9ANTc3lzfeeINPP/2Un376yTvlyvjx44XFwyN4ZAfkficeII9/p8OExqc19ysWuq/e6h6dkolHPEvZx9t3jqWzysfjidXPy8vzS3BrDpPJhN1uR9bqqFHK0SBjKG9jmWqtFqN9qQyMtbQKR6WwBwMYg0Aq8NvNM2Gcy+UiMDAQrVaLwwnFJl9Z+iCti5wQ9wRyiphmGoQtusV5MbIdzO4OKX+nf7RabRRZTBoFECCDvgEN014uPhSB0eUTuk6zu7qBgj5US3xiF+89O11otVreuqEbD/0qHPUdQhQuHdQVtiJmnqw5BKFd6+9YXsuuFemv8dQu8OoneBIvEdqNrQy63w1Gn3ZTO5zYGTGQgOJcr+AB6gmc2vSMwZvxv/woLD4EF3eFf68VJjiAx893z48S2ZtevXoJn2Xtgcf2p8BZTTfdXr69NJ/7N8RTZhHhyy2awjmki0hCthYJwdPpBv/1HsFjTBNmaDeSJBEdHU1+fr7fb9AUiqKwdOlSHA4HF1xwAWPGjGnRfoAwt+UvFuVezHmickVtPP4dfRhE9K+/vy5Y+LMqtjcZYECxJ0pMghgRN+90Ovnmm2/Iqw6kIjqcCH0lITXrgL803eaaoyJgAKD7PdD9LiEYTvwkBKXiEsEEhz8SMXeSBnr4jukph1N7brPU1FS6dnU/1xH9IKyn0EqPfgXRmb5BVeLkxtsV2lVcH4q7dI6/P0mSJPpFmimogMPWKMw2J8Ht5Og5qwQPCOHT2s7ss88+4/DhwxTEn8fPtsF00R9m+V2dm9+xNRzOFrNkAkzaKSbDAlh9rTDNhHaHS+uPKqqqqnj11VcBmDp1Kr17i/0+2y7mftdKCt9fI9HTlwyN3W6npKSEsLAwQkJaGG0iO+DrIaJeW99/QM8nG96uKAu2uB/WC1dDbAPD5eLV8Kt7m1E/+Ryg2x6H/c8Jv9BVlY2PtNoYT47PhZ0hPUYi2DBRCB4Q5raGBI/HzCZp64WrGgwGjEYjJpPJK+S9J2pkgrPana4+YQQUfy86AntlvQoUIDpfRVG8Prr7hsJP+0VZoKdXiXD/D9yWoFGpcGOtJkqS5JfoSsUukfcBkHYDHVL689+UhmM/GkWSRFh17ne+3JXaNOHfiYmJIT8/v8UJk1VVVd6BY4tCqWuTeDF4LGT5v0CXGf7ra/t3NA1EaoAwt1Vsb1rj8fwG4b3BEEFZWRlvv/2229ogYQ4bToRlkQgwaO6H3vd/QrhIGuh5P4Skiei69AeFhSR/sYhKy1skEse7zASjT5u96KKLiImJYePGjVRVVSFJkl8hUCQJUq+FHU9B0Qo49IF7T8mX/9QQ2gAxkDAdabBYKMDFvcNZshqckp7dpUqLy0OdKmeVqe1k8Th0KxEva5yhou1PkngJ3hyBEz/6vveYLRpxyIaGhnrtrrU7rxv7wRdXwNdXSfSN89/HYDCQmJjYcqED/pFbTdi2Kd/qXpAaj/SKHurza+XXcrB6XvqYYe0mdDxoNUJLSI1AjMg9s5TmN+IA9gYW9K4fQAHejt1P8DSBzWZDp9NhNBoxJHjuteIfso3QcFetWsVbb73lHcmCyPj/+2h3k2tg5o9C4w0PgJfGNyNAtjwkNFVtIGT4Cpm2WOh48JjQKnb4O97Neb5w/Lj6UQqeMi2lpaUtMoHXfs5bLXhCu4uUB6hvbrNXQsVWdzsbMLN58AQYmI6KXK66KIoveCF2OFu3buX111/3mri7du1KQv/rxXprAVQ2UX7CXg6H3hfLHacJoVMbQwSkXg3DP4ephXDZERj0lt8mRvf8XPfeey/XX389M2bMIDm5TgG1VPfEZIoM+0XFCmIyIbAZU7cnkbS6YcEzoV8sngDg7NzWPlAnzzkveBRF8QqeMln4SOIN1W1/osBYX5KZR/CYckTFZPC3oddCkiTvy1d3xHh+SssLULYIjy+jrAWCJ6RL4wmgGp0vn8HTsTstPtNSUy99e5EgyudQuLzhKguNBBZ48Jjb/ExtTTB8+HAee+wx7rzzTrcfwW0s8GT6u9FoNOzbt4+SkhJ27tzpt+7CzuIDPjPrc2N9c/esW7eOnTt3erPMAchbLEb+AD0f8jP/tRrvM6pASbbv+5JaPp9GNB7AWyy0OU5J8Ei1RvEFv/pHYxX/7vOnNvUM1g4waMjcZs7x5dHEDCctLc2rmYaGhjJ16lQ0nkRS8E2y1hAH3/NFl6U/1Ph24C7NldaopqbRaOjatWt9oQNCg4p054l5foOkJsxsHkLdNdsa8PGAmNCunzsCf1071m075wWPy+UiIyNDJKu5xEMeF9jCGiWtxRP1VLJWzBtf20nbiOAB38vXEht5eXl5q/xbfniSJM05DY/0wCd4Ivs3fSxPx169X9iwS7N9HXxc4/6MdsNdtw1njX8nCmIU6hnBNyJ4PBpPSwUPiEGE0WgU1S8i3LaxWn4eD336iPT93Nzcesd/ehTesPopPWCye0DqcrlYvnw58+fPF6VcQHS6W9xzYwbGQ69HWtzWBok8z6f91RY2nhlHDVHCl1CH2sKjJc+wp2xL3SirFuMRPI5K/3vr0bh1IfUCRvyI6Osr5dSQ4CmuZWqMGUZERAQTJ06kc+fO3HDDDWLa96B4EYYPwtzWEC6bt9Bn3akZTgup1/r/X7sadWN4NB7LCf8ai7XwVNvekKdQVnkaBu0NcM4LHp1Ox+TJk7n62usplYVJID7oJDvu5vDeaHc2u0fwBMT4R5DUobbG01xNpzlz5vDcc8+xbFkTo6zGqP0y1naue3DZRRIhNC94OtTKWM9fLHxD4C46ObThfdqTuFG+zqWuua2s1rU3o/HYbLZ6hT5bhLvwZlOCB2DHjh1+65LDxCRgj50vAgM85OTkYLeLaESvU/nwR7771e/ZUy9RpDVAlNsMVXvQ5PXvjGgwryQqylcptSWCx6PZt1rb8RA/pta9rWVuK1zpa2dTOXjagFoDgwb8PB7/TkC0qBIPDBo0iBtvvNGv8rRX6y9a2XC5rGNfifB0aF7baQtSp/uWg1N8grEpavdLNQcb3CTTrWBZnBKfLdl2Cg1sOee84PFQalK84dRxwa0s2NdSwnv57M8nfvSNGmOGN2lwb6mpwuFweH0O3vIarSGiry+UtCE/T9VeX6h1c4InJM330BYs8Qme6CGNR8y1J7pgb/5CvXweb2CBxtcB1aG2874585HJZOLIkSNUV1f7fBwewWPJA7O/jSIsLMxbVWLnzp31/CKDEmHWQAiqpQwcPCg6BZ1OJ/Z1VMF2d4BIRF//UO9TIdadz1O6TnSmjmqf36QRP6Ver/cK6pYInlYVB20IndFnSvP4eRzVvme6JaZej7mtKY2nmffWW7fNUVX/OIriC6EOS4dE/1yg04IxFTqIpFDSrm+Zk88zPQI0am4b1EFU0gdYm+NqfcHTk+API3iKqn1hwXEhp+myJMmn9eQv9iWyNZPp7XHOQtMvbm0f0Enlx2gDhTMdGvbzeAMLaF7wgE/rKVhayxl7FpjZPHjMbWUbfcmP4BM8Yb0aFZJJSUncddddPPbYYyQkJDR5mqNHjzJnzhxeeeUVXzBCTC2trwmtp7i4mKKiomYvxSN4OnXqJMKod70gQp8Bznul8Qiu1uIuGIrTJEKWS7J9PoMmnuPRo0czdepUhg0b1ug2ICZb8+ShnLTgAZ+5rXwLWAqEhuapL+iZQqEpPAEGljwRPOHBc93gP0leQ8Rd4BvI1TW35S/2aaPpDzY7Y2+bMeIrGLUQ+v29ZdsHJ/vMq41EtoUGQPdw0X/mKEmtytc6Wc55wbN9+3ZWrlzJ1j2+JMP40NMYJZ7sFjxOU60XtnH/DrRc8NSeAuCkX9qmAgw8gicgRlTlbg5Px+6o8pXPORsCCzx4zYGKXxl778i4ETMbiMjB2NjYFvkgPPdMp9N5pwkgtIdv/qI6AQYgkko9Duu65ra6VFZWeoVT165dwXQM9r4iViZe4ht5twWxtTrb4tU+M5smoMnfKyMjg759+/o9y41x8cUXM3DgwIad5C2ldrXq/F98/h1tcMt8KY0FGJRu8Amw2GYEjz7Mp9nWDTDY85L4GxgvtI/2whABSZMar2NXF0njNSc2FtkGMLKTyBcs1HVk5+69p9jI5jnnBc/OnTtZtWoVOw75ivHFhZ6+GmLEXuBfVqaZFxZEh3XxxRczffp0bx5PQ3gEj8FgOPnETE+AQc3B+rWqPCaVyP4tU9PjRvs/4JKm+Ze1PYnoK1588Jnb7JVQ7R6ENOWAbgUeTTQ6OtpXO0+j9XWADWg8wcHBXl9NQ+a22ni0HXALngPviHwsSQsDXmqTa/ASEO0LICheU2vityHN17NrAYGBgQwdOpTJkyc3q0k2SVgPX42yvEW18neGt6zTDe/lq8xRW/B4/DuSzl84NUa8289TvNpX47Bss69OYPe/NBiuf1bRTGQbwPAUd902KYCsvWUnVTOzNZzzgsdjn3e4Ha8hUhVGY0smPD9JtAafnRVE59OCF3bo0KGkp6c3KVBq28brzZXSUvwCDLb6lhWl5RFtHvQh/uaXyPPafA6eU0LSQIJ7euKCJe5rbD6woDZ2u73ZXJ5GfRbeAIONopxQHfr06YNeryclJQWbrYFpRN14BE9UVJRw5Be764PFj4Hw9Eb3O2k8Gnrxb76osVYUBm0XJMn3nuUv9gUJtFTj1uh84ce1Aww8/p3I/i3zVXoCDGS7LyBjz8virzYYuv2pZe05k9TO5WlEoAxOBI3bz3PYEcfx48dPa5POacFTu7yMWRLJEHGafG/Ns9NG7TDGmKbNbK3Bo/GcUv2ziAy8ia61O2HzcXcZHBouNdIYtaPbziYzmwdP+8y5Yp4lj4lR0jQrYBctWsTzzz/P559/3ug2iqJ4BU89M5NH8Dirobr+aDI9PZ2HHnqIqVOnEhjY8KjY5XJ5i2927dpVhOiWukfobfhs+eE5riXPV7evGcFjt9v5/PPPee2119i+ffvpaVddaodVe2YLbs0z6PHzlG0UHW7txNHm/DseYob5NKfCZSJ3L2eu+L/LbQ1Pk3K24QkSclSCrbjBTcICoFesEDwFujR2727lnO2t5JwWPFar1RuCWiWLKLA4Tb7IRzidJE7yFQRtqmRFK3C5XN6CiqfklNWH+CJZavt5ymuFSbZU4wF/W3t8K2putRcejQeEuc0bWNDTb26khvBUhqioqGjUtNBk+ZfopgMMdDqdX1n8htBo/r+9Mw+L4sr68K9BZREF1EDiFvMxcYmaGMcNJ1HEfTejKDpuGDAuAUFFzUTHLRM0aNQoGTWMidHEJTpxXFDRGBeMuCPuxmgUBJVFQAQa6DrfH0UV3XQ3NNA0XZnzPo8PZd/b9966XVWn7jnnnmODgIAA9O3bV0zUlX5JVLMBVafW1LNJquTEb8aoWbMmEhISkJmZWaqdcufOnfjxxx9x545xe4LJuHvrpla3tS8W9qYgqdLUqaLd7PkdMWQNYPrc2toVe08+/qlEeJxg08dSnZjg2QYAXRqL4uBJjaa4cet2larbFC14tN1gMzRagqeMKNGVxq6eGOfM61BxNN0ySE9Px+bNm7Fy5UokJibqlefm5qJx48ZwcHCofMRnScWg7VItqdls7HQvxDLbehPosA5ou8hsQtasOLgXC9LkaOBZkeBxLdu+I7kIazQaPH9ueONcqbvwHRsVO2mk6jsYmIIU2aJLly5o1KiRVhw1VdXtl6rzOmCndY25tAFquRqvj9IjcEhoNBrcunUL8fHxOo4yFaamU/FDHyhafZTDDlXSwUBn42g5hLqkbku/KEYqAIAmwwEnM8eDrCq09/KU4mDgWeQLUqCyg1vrHlXqVm11QULLg7ZuPrVAtD241cyoQBCrCuD6pvFc6Qaws7OTo8+mpqbqefw4OTnBz8/PPG8Z9dqLm9uybooG0RqOxY4FLm1MS4CnTfPplR9TVfJyH1GwPv1ZVFUBJtl3tPfyZGRkGLS/aQsegx5d9TuJuXsMrHgkEhMTERcXB1tbW/TvX4bwlgSPSxuDwUfNgkolvvEn/lf8v4n2nQYNGiApKcnoiic9vdgoXalVuzYN+xd7lJVX1Vu3ueh5WPhctPNIqx2HRuULPaSdDlvKR9VyVvnGUp3Y1RNVguo0oy7VANCxkRzLGi/qvQlbM3nwG0LRKx5J8AhQIa1I8LjbZZXyjerD0dFRDMWB0l2qVSpVxR0LJKS3fRKK9xqV17FASUhu35o8yFHQyil4jIXOcXZ2RosWLdCsWTPDajNJ9ZMRL8azM0BcXBwuXryIuLg4nXBIarVa3vMCoCgLqLS5sYrsOxLab/wmCh7tYKGG3oa1r2uz5WlqNBiyzVLb3mgKKpvi6yD9QrFQL68K06Wdrvr+pXd193EpAcmzzYAtUsLZDnij6GeL1VfKmBVFCx5J1Wbr9BI0UtQCOxMycFYD2qoKU/OaVJh6bxcfp18SXYyl2GXlcSxQCi+9UyKpncokAevo6Cjv4zEmeFq2bAlfX19MmDDBcCOSOowKdb0ItWjbVowCnp+fr2P7uH79OlauXIkNGzaImXdf3BejIQNV77besD+AotTnJtrupOtXo9EYjPYgXde2trY6OY8qRd3mQPd9QNfvxWjM5UVyMEg7WxxlujxqNkB0ndeeo1ah5R9HdSN5tpWy4gGKw+ecTwI0VRjAQNGCp3Xr1ujTpw/+9FbxheTmUEb2zWpEemM0JHguX76M27dvG7U1lItarsWhfZ5dKl71AH/MFY+tne5u9rrNTXL7VqlU8gPS1PQIetT7M+Q3ciPqtqZNm8pqPO3NpJIbtVqtFr3eKmqDqAgubUU7ZZ9YwOEVk75S1kZobe8/G3NmAG40EGg2uux6hpDsPFIEaaBicyulqW7gWZyfSklIdp7suwZd/yW6FAUMfZ4P3DCDmc4YihY8jRs3hqenJ172eEv+zN3BQDA/K0F6Y3z27JmcrhsQXXajoqKwfft2XLx40djXy4d2BAOdUDmm26UUhaRuA/QyjpZGRaJU61DLuXhDpoEIBoAo4KQQOr/++ityc3P13aiBYlWQvbtlDNcveQKub5VdrwhTBY/Z7DvmoGSUA1v7ir18NRoEDH0AeB+1XHgccyI5FAkFxdlLDdCpkfwahdgqTJOgwBnU54nWy4xbbcslMyov0g0pCILOgy4jI0NOQmU23bgkeDKvFT8QnTx0oy78kdDW/5cjYoGrqyvq1KljMChrWloajh49iri4OFEVZoxSIlVLSIJHEATcvHkTiYmJ8qbS118vCmkibVAsK3hlNVGjRg15hVhS1VbqfqfqpPZruvaZeh0qnsSwdlPrCJBbEUz0bHOxB1oVvTdUpZ1H0V5tEk+LBI8jsuHkWIGozhaiZF4TQzYfswkeycFAKBC9roA/pppNom5LcUNf2gWg2RiTv9a3b1/069fPYFliYiJOnxaFgYeHh9FNoKjfCbi/Gcj+TfQcMrCp8OWXX0aDBg3kBHENG4pu2La2tsXRqDOK1HDWFJaoBGPGjEHt2rXh6Kj7AH7+/Lm8p85s17A5UKlEYfO4KKRSVaswrRUnD8g+a1l3St0a0aUxcCMVOFdk57GtguWJYgVPWloa9u7dC2dnZzy06wfAEW62yVBVddSCSuDi4gIbGxsIgqBjU5D2PKhUKp3cJ5XCVcvBQNJv/5EFj0oFdI4su57e14yvLKQXAjs7u9LTkOtEqj4PNNQXZJK67fjx47h//z6ePHkCAGjWrJno4JB8HLJHnhU/HI0JFScnJ0yfPh2pqanifiRron7HYsFjxUK9SqnhIK7YXjwo1bMNALq9CjzIFAVQAQseXdLT0/HwoRgYNKWVGL1XjFpQ+ka46sTW1haTJ0+Gi4sL7OyKN8JJgsfV1VUMiW8OHNzF/Qq5WoraP7LgqQK0E5qV6uLu3FbcmCuoRXWbAcEDiN5tx48fh0qlktNb69l3bGqZ5ApubdjY2KBBgwbWZd+RcPcCrv9TtO9UtZu6NVOnhSh4yvBs69FM/FeVKNbGo61jfpYvPqzdbZKqPlxOJXF3d9cROkDxm7XZVRQlbR0seAzy+PFj3Lx5E48fP9b53GRjuW0trYCUxiMY1KtXDwEBARg0aJD8mZ59p55pQWerE41Gg7S0tIplbq0O3HsCXbcBPY4A9lYoGC2FdrDQakaxKx5JVeXg4ICU3KI9PDbJgF2z6htUBdA2ypr9bdG1vZgpFRDtDg5WpgKxErZs2YKcnBx07dpVDuUvCIJOOoQyqd8JSIsVVzxERp0DGjZsiNzcXLRo0QIZGRmialXQaEWJtm5VUE5ODlauXAlBEDBs2DC89ZboFVdQUGBSbqNqQaUCmvlW9yiqH8nBICdBVL+XEcuwKlGs4JFWPM7OLkjJFS94NwWseAAxBXZ6ejpcXV2Rl5cne0yZf8WjZedxecsqPaWsARcXF+Tk5OjY3Z49eybvzjfphUDybFOniu6qTq8Zrerh4QEPDw8QkajCy7guhnUBrF4V5ODggJo1a0KtVus4xaxduxaCIODdd99F584K29X/v4J2jMbnd8vlSm9uFKtqkx4SNeq4oZDE07BIZOpKkpycjE8//RT/+te/8OjRI9jY2MDLywtt2rTBK6+YtpHPZLQDZbKazSiG9vKUGhzUEGWkwjaEbDdK1d44WnqU6OrGULDQvLw8PH/+HC9evIBtVQb4YiqHiS7VlkCxgkda8QiOxasEUfC4VNOITMPV1VUOpJiamoratWuje/fuGD58ONzc3MzbmWNjoF5HACqg8VDztv0HwlD0AknwmOxp6ORR7NhiouCRkew7Th6iU4iVUzICh3a0aqt0LmBEHJuITjBAqekRLIEiVW3aYew19sUPBfdaWeWPvGxh7O3t4eTkhOzs7KqP2aZSAb1jxKi8DpVIQ/wHR1rx5ObmQq1Ww87ODs2bN0etWrVMf4tXqUR1W/LhUh0MDCIHr7RuNZuEJFzS09MhCIJOCgQWPFaMja2YEiPzWpmebVU+lIp+MScnB/3790fLli3RunVrrF271pzjKhVtj7a8GsWh493sjacXtibKymtiVmxrsdApA+2AlpK67aWXXkLHjh3h5eVlekOSnSf9krhx1xRyHxcHcLXi/TvaSCsejUaDjIwM+QXK3t7eYAQIxoqwEs+2Sqna5s2bh1u3buHs2bOIiIiQgx5WNc7OzpgyZQp8fX2hsRdvAnvkoK69lXrVlEBbVfHDDz8gKipK3pPEWB5T0iOYhCR4NLlA5nXTvqNt37FyjzaJkhE4TN7vxFQ/kp0n67bofVlNVFjwODo6ont3MTGTk5MTWrRogeTkZLMNrDRsbW3h7u6OFi1a4FmBqLN0s0mGyt66HQskpBs3IyMDN27cwPnz582TsZGpEM7OzvIDs8JRqgHdtMym2nmkiNQ16wJ136h43xakXr168nylpaXJ1y6r2RSA5NlWkCF6YFYTZjGIJCQkID4+Hu3bmx6c0VxIcdqsPWqBNoZuUL5pqw9bW1u0atUKNWvWRIMGDZCQkICoqCg0aNAAvXv3NpiZ1CD2bkDtZqI7ddo54E+Ty/6OtOJp4Cnq4BVAjRo14ObmBhsbG9SoUUNeJfI1rABKerbZV09cvTIFT7t27eTIydpER0ejYcOGUKvVGDVqFMLDw43qdyMjIxEZKcbRevr0aSWHLG7uk/J9SILH3VYZe3gAwzeoVQVW/B/Ex8dHPr548SIeP36Mx48fY8CAAeVrqH4nUfCkmuBgoMkD0ovSYCjEviMxZcoUAKK9NSYmBllZWSx4lIC24Mm6XW0OLWUKnri4OKNlRITx48djwIABGDFihNF6/v7+8Pf3BwB06VKBLIIl2LJlC1JSUtC2bVs8fSGGw1fCHh4JZ2dn1KlTR/bMc3R01Iv2y1QfkrG8du3acrpyk6nfCXi4U7TxpF/W3cRbkvSLgFCUuFAh9p2SODs7IyQkBPn5+eZN/sZUDfYNxMR/uclASgzgMalahlGpK+Wjjz6Co6Mj5s+fb67xmERGRgZevHiBgoJCPH0hGsjEcDnKEDwqlQozZ86UA0Tyase60DaWl5umIwBbRwAE/DIGKCwlFbtk31HZFKfQVii1atUyX4Bbpmp5uShpYtLBanMwqLDgSUxMxPLly3Hu3Dm0a9cO7dq1w+HDh805NoMIgoCsrCwAgK1TAxQIopFTDBCqDBuPBBtlrYfMzEycPn0a+/fvx4MHDwBU8Hep/Srw59XicdYt4PJs43Ul+47Lmyal6rYmCgsL8dtvv+HcuXOV8wRkLI+UiyfvsW52YgtS4VeUxo0byzvwLcnz58/lGFram0eVpGoDgPz8fHk/Eq94qp+srCwcPXpU57MKvxB4+ANJUWICvl//BTQcIKZO1oZIN+OowsjLy8PWrVsBiC7o3t7e1hsklNHllT7iKpsEIPlg6ergKkJxSlntzaPqmi7ysRJSImijvXnU2dm5lJqMJdDeyyNRYcGjUgGdvhJ16QAQO0ncKKpN9m+AusiFXoGCp3bt2nJ6j9jYWJw6daqaR8SYTC3X4piASQerZQiKEzza+yxyVMXqCSXZeACgTp06cmbL114zHsmYsQy1a9fWs1FUSgVq3wDo8o14rE4BYv109ekp2htHlREqRxuVSqWTV4rVxQrjlSJ1W+oZIN/yqlLFCh5bW1tkFooXvh3y4Kx6pqgVj5OTE0JCQhAUFKSXGI6xPCqVSid0To0aNSq/En2lD9AiRDxOPgTcWVdcJtl3HF4R7UIKhwWPwpDsPKQBko9YvHvFCZ7iPDzOeJojOha8ZJMspppRmHOBnZ0d68WtCEndVqdOHfztb38zT/iXdp8CLm3F48uhYu4dQNe+o9AwM02bNpWPTUqWx1gPru0A+6IYjsmWV7cpTvBISdNcXFx0oxbY2AG25dxzwTBaSCseGxsbNGvWzDyN2toDXb8Xr09BLbpY5z4pjuWmQDWbRJ8+fdCkSRN069aNV+1KQ2UDNOwnHicdEh0NLIjiHO99fHxQUFAAtVqNH6PEz9xtkkT7jkLfHBnrQFrxZGVlQaPRmC+pmUsb4O3PgIszgIx44OQQAEX2HgU6FkjUqVMHkyZVzwZExgy80h+4902xW3U9y4U8U9yKBwBq1qwJJycnPNGJ06Yc+w5jnTg5OQEQI3Joe0+aheaBwCtFb5hSAFEbO8DV8q6sDAMAeKU3oCp6ubKwd5siBQ9QtA2CBQ9jRpo0aQJA3IVvdtubSgV0+Rqw0zLC1+8o5ktimOpA263awnYexanaJDLVgFojHosBQpXlWMBYHy4uLrLqqE6dKogk4PAy0HlTkaoNirbvMH8QGvYXY7alngHU6RbbkqLYFY/kWAAobw8PY700adJEXvlUCY0HA3/+Amg4EGgRXHX9MIwpSPt5SAAeW86t+g8heJQWtYD5H6dFIOC1n1OSM9WPa7viCBsWtPMoVvA8KbniYcHDMAxTPlSqYqeXZMu5VStW8EgrnprIh6sqjVVtDMMwFUGOVv0EeHbZIl0qVvBou1KrVABqsnMBwzBMuXnZ8m7VihU8OlELAF7xMAzDVIRaLsUbmVnwlE6x4EkSD9jGwzAMUzEkdVtarOhWXcUoVvA84RUPwzCMeWio5VadHF3l3SlS8BAZULXxBlKGYZiK4fJWsVu1BaIYKFLwZOUDeYXisbtNEgAVUJOzeDIMw1QIlap4M2nSwSp3q1ak4NGLWlDLVQzzzTAMw1QMSd2mTgHSL1VpV4p8WutELbDlqAUMwzCVxoJu1YoUPHpRC9ixgGEYpnLUci4OXJsUVaVdKVLwSCueGihEPVUqOxYwDMOYA8nOk3YWUKdVWTfKFDzZ4t+XaqTBRkWsamMYhjEHkp0HVKVu1coUPDniXzebx+IBCx6GYZjK4/Im4NBQPK5CO48yBU+Rqs3dJlE8YBsPwzBM5VGpilc9VRitWpGC50mRqs0NCeIBr3gYhmHMwyv9gTqvA6+OBgpflF2/Aigu9TWRtqpNitPGzgUMwzBmoclfgabDq7QLxa14svOBnALxmAOEMgzDmBmVqsq7UJzgkVY7AAcIZRiGUSLKEzzaUQt4xcMwDKM4FCd4JMcCAHCzlSJTs+BhGIZRCooTPJKqzQYC6qtSxP+wcwHDMIxiUJzgkVY8L9llw1YlADVqA7a1qndQDMMwjMkoTvDICeBqZooHrGZjGIZRFMoTPEWqNvcaqeIBCx6GYRhFoTjB8yxX/Otm+0Q8YPsOwzCMolBc5ILDfwMy1YDm53XAM/AeHoZhGIWhOMGjUgEu9gCE++IHrGpjGIZRFJVStXXr1g1vvfUW3njjDSxZssRcYzINdbr4lwUPwzCMoqjUimf//v2oW7cuCgsL8c4772Dw4MF4++23zTU24xAB+UWCh1VtDMMwiqJSK566desCAAoKClBQUACVBYLLAQA0OYCQLx6zcwHDMIyiqLRXW9euXeHm5oZevXqhXbt2ButERkaiS5cu6NKlC54+fVrZLoH8Z8XHrGpjGIZRFGWq2tq1a4fCwkK9z6Ojo9GwYUP88ssveP78OUaMGIFr166hTZs2enX9/f3h7+8PAOjSpUvlRy3ZdwAWPAzDMAqjTMETFxdXZiN16tRBz549cejQIYOCx+zkawketvEwDMMoigqr2jIzM5GSIgbpVKvVOHz4MFq2bGm2gZVKPq94GIZhlEqFvdoyMjIwfPhw5OfnQxAEjBw5EoMGDTLn2Iyjo2pj5wKGYRglUWHB8+qrr+LChQvmHIvpSM4FqhpADafqGQPDMAxTIRQXqw2A7h4eS7lwMwzDMGZB2YKH7TsMwzCKQ5mCh8PlMAzDKBZlCh55xcOOBQzDMEpDoYKnyLmAVzwMwzCKQ6GChwOEMgzDKBVlCh628TAMwygW5QkeoQAofC4es42HYRhGcShP8HBkaoZhGEWjbMHDNh6GYRjFoTzBwykRGIZhFI3yBA9HpmYYhlE0Chc87FzAMAyjNBQoeLSdC1yqbRgMwzBMxVCe4JFsPDWdAZsKZ3VgGIZhqgnlCR6OTM0wDKNolCt42JWaYRhGkShP8Kg5MjXDMIySUZ7g4cjUDMMwikZ51vkGXYCaToDr29U9EoZhGKYCKE/w/HlVdY+AYRiGqQTKU7UxDMMwioYFD8MwDGNRWPAwDMMwFoUFD8MwDGNRWPAwDMMwFoUFD8MwDGNRWPAwDMMwFoUFD8MwDGNRWPAwDMMwFoUFD8MwDGNRVEREluzQ3d0dr732WqXaePr0Kdzc3CpVRylt8Ditrw2ljNMcbfA4ra8NaxmnKdy/fx9PnjzRLyAF0rlz50rXUUobPE7ra0Mp4zRHGzxO62vDWsZZGVjVxjAMw1gURQoef3//StdRShs8TutrQynjNEcbPE7ra8NaxlkZLG7jYRiGYf63UeSKh2EYhlEuLHgYhmEYi8KCp4KwhrL8lDVnpsypOdqobB9KwVLzaYnfxFqo7LnyfIooSvC8ePHCpHqCIBgty8nJKfW7WVlZpZYTEe7fvw+VSmXwAiAiJCYmljm+O3fulFp+4MCBUttIS0srtRwwfb5MobQ51aa0m8LYnJlaLtUx1m9hYaHRcqnOzZs3AZR+PqW1UR5KOxdTHx5lzXtZ51GZ+SytXKqTmZlplt9Vu8+KlFtiPgHTrmFDWON8loap93tFUYTgEQQBU6dOxYwZM3Dw4EFkZGQYrPOPf/wDAGBjY6M36YIgIDg4GMHBwTh58qTB70+bNg0ffPABjh8/bnQsn3/+OXx9fXH79m29C0AQBIwbNw6HDh0y+n0iwoABA7B3716j5/ree+/Bx8cHly9fNlju5+eH6dOnY9euXQYvLkEQMGPGDMyePRtHjhxBZmam0b4WLFiApUuX4uHDh8jOztYr379/PwDDcyrV+fTTTxEWFoYXL14YnJP58+fj448/NnpTzZ07F++//z4AwzeVIAiYMGGCzpxp1yEi9OrVC9u3bzd4nlIbffv2ha+vr8Hzka6xOXPm4MqVK3p9SHU++eQTLFu2DImJiXqCXRAErFq1CqtXr0Zubq7eQ0YQBPnaMvbwEAQBERER+PLLL42OMzw8HMuWLQMRGfxd5s+fj3/+859G+xEEAR9++CGOHTsmf1ayj/feew+7d+82WC7V8fLyQs+ePeV+SpbPnz8fCxcuRFZWltFxREREICIiwuh8WcN8AqXPqZLms7L3u7lQhOD5+uuvYWNjgwULFiA+Ph47d+5EUlKSTh0/Pz+Eh4cjJCQEgP7FMXHiRNjb22PChAlYvHix3kN90qRJqFu3LmbNmoUNGzbotS/RrFkz1K5dG8HBwbhw4YJOPyNGjICdnR38/f0RExODa9eu6X3/xIkT6N69O2bPno1vvvkGMTExOheAv78/3n33XRw4cAARERF6u34XLFgAe3t7rF27Fnv27MGtW7f0+li2bBkEQcCsWbMQExODL7/80uAKaenSpSgsLISHhwe2bNmCAwcO4OnTp3K5t7c3hgwZgm3bthmcUyLCkCFDkJ2djcLCQkyePBkajUa+4IkIw4YNg0ajgbOzMxYsWCC3I6FWq+Ho6Ij4+HhMnz4dgiBApVLpvHF5e3ujefPmGDJkCO7cuYP8/HydNrZv346hQ4di7Nix+Omnn/D06VPk5eXpjHP06NHo1asXhg0bhoULF+qNIyAgAHXr1kW3bt0QERGBgoICvRs3MDAQubm5aNy4MTZu3Ih9+/bprJADAwORnJwMlUqFf/zjHzh79qzOOHr06IERI0bgu+++MzqfgwYNwu+//46EhASMGDFCZ5zSfD59+hQvXrxAYGCgXjvZ2dlISEjAiRMn8Omnn8rzqd1P79690ahRI3h7eyM1NVVvLtasWQMvLy8MHz4c8fHxyMvL0yknIowZMwb9+vXD4MGD8dVXX6Ekvr6+UKvVaNCgASZPnqzXBwCMHz8eiYmJyM/Px5w5cxAbG6szTmuYT1PmtKz5XL16tdnms379+qXOZ0JCgtH5XLx4MQoKCoze715eXqXe72bFtH2m1Ut8fDzNmTOH8vLyKDk5mb7++mv65ptvSKPRyHXu379PRETz5s2jadOmyZ8LgkBqtZqioqLkz7Zv304rVqyQy/Pz8ykmJkYu9/X1pcDAQPrhhx8oPz9fZyz5+fm0f/9+Onz4MPXt25cuXbpEiYmJRER08OBBmjhxInl4eNCcOXOob9++tHv3bhIEQf7+w4cPadq0aTR06FBasmQJBQYG0vr16yktLY1evHhBR44cISIijUZDy5Yto/j4ePn/RESHDx+msLAwysvLo0mTJpG/vz9t27aNnj59Kvdx/PhxioiIICKiR48ekbe3N23evFlvXr/99ltavXo1ERFdvHiR1q5dS/v375fn5aeffqKEhAQaOnQoffvttzpzSkSUmZlJGzZskD+fO3cubd++Xa7z/Plz+u677+RyHx8fWrNmDV29elVnThISEmjPnj20ZMkSmjhxIhERZWdny+Xz58+nKVOmULdu3cjPz49Gjx5NZ8+elfuJjo6m2bNn07Bhw2jatGn04Ycf0o4dOygrK0ueg02bNhERUUpKCn3yySf0+PFjnXNZuXKlPNaRI0dSWFgYnTlzhp49eyaPIyIiQp6fH3/8kfr16yf/XoIg0M6dO2nfvn1ERPSf//yHlixZQteuXSMioqSkJDp9+jQlJSXRkCFDdOZT+m1jY2N15mvu3Ll0+vRpuf1z587R7t275fKAgAD6/vvv5TFK53vhwgXasWMHrV69mhYvXkxEJF/Hd+/epU8++YTCwsLIy8uL3n//ffrrX/9KT548kfuJjY2l1atX09ChQykgIIDGjh1LsbGxcvnRo0dp27Ztcl/h4eHymARBoMLCQlq/fr38G02cOJH+/e9/U0JCAmkTGRlJN27cICKiVatWUY8ePejXX3+V29m1a5c83yXnMyEhgU6fPk3JyclG5zMmJoa+//57o/P5yy+/0I8//mh0PgVBkI+NzemtW7d05tPf319nPgsKCujMmTOlzufhw4dLnc+8vDzasGFDqfNZUFBAmzZtMjifgiBQSkoKHTt2jFauXElEuve7IAj04MEDunXrFj148EBvPrXvVXOhiBWPh4cHGjVqhBMnTqB+/fro06cPTp06hbi4OLlO06ZNAQBhYWGwt7fH9OnTAQCPHz+GSqVCz549QUQQBAH169fH7du3AQCJiYmoWbMmunbtCgA4e/YsHBwcMGzYMPznP//BxYsXdcaSm5uLffv2oW3btli0aBF8fHywefNmAEC/fv3g6+uLkJAQLF++HCtWrMCOHTt02mjUqBFatWoFBwcHBAYGYtGiRUhNTcXly5fh6OiIXr16ARCXurVr18bnn38u/x8AOnbsiFatWmH8+PG4ffs2QkJC8OuvvyImJkbuo2XLlkhPT8fGjRtx/vx52NjY4NChQ7h37568ZAeAXr16IT8/H1evXkX79u3Rvn17HDhwAPPmzYNKpYK3tzcaN26ML774Ajt27MCWLVsAiPalhQsXom7duvDz80NBQQEAoHnz5sjIyIAgCPj444/h5OSEMWPGAAD27duH7Oxs5OfnY+XKlZg9e7Y83uzsbPz8889YsGABWrVqBU9PTxw7dkxWnS5duhRubm4YPHgwNm3ahL/+9a/YsGED5s6dC5VKhR49esDR0RENGjRAREQE3n//fVy/fh2pqalYvHgxGjZsCD8/PwBArVq1kJKSgv/+97+y6gEQ31pTUlLQv39/pKSk4P/+7/+wd+9e3L17V56vN998E9euXUN0dDTc3d1BRNi7dy8CAwPxwQcf4MqVK0hLS0NWVhbee+89vP7661i2bBmCg4OxcOFC5Ofn45VXXsHq1auxfft2eT7z8vIwa9YsREZGwsXFBQBQWFgIBwcHPHjwQF69bty4EfXr1wcgagFu3bqFy5cvY+zYsQgKCsKsWbNw8uRJuLq64sCBAxgzZgwcHR0xaNAgnD17FjNnzsSyZctQv359aDQaDBkyBJGRkejevTumT5+OoKAgTJkyBVeuXEFOTg5atGiBjRs3ytd4ZmYm5syZg+3bt+PVV1+Vr+fY2Fj8+9//ltXZU6dOxc2bN/HgwQP07NkTd+7cQVZWFhYuXIiEhAQEBQVh8uTJOHbsGGJjY3Hv3j14enpCEASsXr0aU6ZMwZgxY3D58mUkJyfrzef06dMRGhqKvLw8vPzyy/J8fvvtt/J8BgUFYe3atTrz6ejoiN9//x2CICAoKAhr1qzRm89Lly5h3LhxUKvVmD59OqZOnYoTJ07AxcUFBw4cwOjRo+Ho6IiBAwfC19cXCxcuhKurKwoLCzFkyBB89dVX6Natm/zd8ePH48qVK8jNzZXnc8SIEfJ8BgcHY9OmTWjSpInOfEZGRsrq/4kTJ+LmzZv49ddf0atXL735nDp1KsaNG4ejR4/i1KlT8nxqNBqsWbMGK1aswMCBA5GdnQ03NzfExcXJ93tUVBTCwsLg4+MDtVqNpk2b6l2fz549M7/Nx+yirIq4f/8+LV26lHbv3k1qtZpWr15N69at05HG2sdLliwhT09PGjRoEGVkZOi0lZWVRStXrqQDBw5Qv3796Pnz53JZbm4uFRYWEhHR8uXLKTw8XE/iX7x4kb7++msKDAykoUOH0ogRI6iwsFCup11/6dKl8tu2xKNHjyg4OJg2bNhAWVlZtG7dOpozZ47B7wcGBtKXX36pNx+xsbHyqu2LL76gCRMmyOOW+ti5cyeFhITQkydPaOvWrXT8+HEaP3482dvb06xZs4iIaOfOnbR8+XL65ZdfiIioVatWVKtWLQoODtbp7+HDhzRy5Ejy8/Ojli1bkp2dHc2cOVOnzvnz52n79u3Uo0cPsrW1paCgILlMWhUa6yMyMpKio6NpxowZ1KZNG2rZsiXZ29vTjBkz5Dra5/f6669TrVq1KCQkhIiIoqKiaOzYsfKKY+7cueTp6Ul2dnYGz2XAgAHUp08fsre3p9DQULnsq6++kt+S//73v9Pbb79N9vb2NHfuXCIiio6OprCwMBo1ahTdu3eP2rdvT/7+/nT+/Hnq1q0bDRs2jCIiIuQVVatWrWj8+PEUExND3t7edO7cOZ35nDhxIr3xxhs0a9Ys+uWXX8jb25suXrwo93XkyBHq0aMHdejQgU6fPk3e3t509epVnRVu8+bN6W9/+xvFxMRQz549KSEhgfbu3UvXr1+ngIAAatu2Lf35z3+m0NBQio2NJW9vbzp8+LDOnPzpT38iPz8/io2Npe7du5Ovry9NmzaNrl+/TkREQUFB1KNHDwoNDdU7l99++43GjRtHQ4YModDQULpw4QJ5eXnRuXPnKDIyUr62goKCqEuXLhQaGkqXLl2ibt260bhx42jVqlU0YMAAunv3LnXo0IECAwPpypUrNGLECJo0aRJ98cUX8gpCms/z58+Tr68vPXjwgIjEFdCoUaNo4sSJ1Lp1a5o1axZduHCBfH195Wvv6NGjdPToUfL29qYOHTrIbTx9+pQePXokz8W0adOoX79+NHfuXDp37hyNHj2acnJyaP/+/XT16lUKCAggFxcX8vLyokuXLtGoUaMoLi7O4HxeunSJfHx8KDg4mCZPniyvSAIDA8nb25tCQ0Pp3LlzOudy//59Gj9+PA0bNoxCQ0MpPj6eRo4cSXfv3qWNGzfSmTNn5DY8PT1pzpw5dO3aNRo+fDgFBQXR559/TgMHDqS7d+9SeHg4bdq0iXr37k19+vShqVOn0rZt2+jUqVNERBQSEkIfffQReXt7U79+/eSV2P3798nX15cmTpxIgYGBlJubS+ZEMYKHSLy4Nm/eTH5+fuTt7U13797VqyM9tKOjo+nVV1+Vf2ht0tLSyM3NjTp16mSwnIjohx9+oM6dO9Nvv/2mV5aamkrTpk2jhQsXEpHuA1Gbbdu20RtvvEH37t3TK3v06BFt3bqVAgICaODAgXT79m2D53Hs2DFat24dqdVqnfLk5GRasGABhYaG0jvvvKP3fW2OHTtGbdq0oQcPHsgqydDQUFn4REVFUVhYGE2aNIk6d+5MDx8+1FNZEhGtWbOGmjVrRtHR0UQkqjWnTp0qlx8/fpzc3d2pc+fOdP36db1yIlHQdezYkR48eEDz5s2jDz74gIiI4uLiyNfXV57TK1euEJEoQEqOY9u2bfTmm2/So0ePaM6cObLwOXToEH322WcUEBBAvXv3puPHj8vjLNnGli1baNWqVaTRaGjOnDk0ZcoUIiL6+eefaeHChfTxxx+Tt7c3HTt2TJ4vqR8iory8PDp48CC1bNlSfiju37+fwsLC6IsvvqBPPvmEpk+fTi1btqSkpCQi0lXxSvPZtGlTWr9+vfyZdp2oqChq0qQJvfXWW7KKadu2bTptfP/999SiRQv5wfndd9/RihUraNeuXTRw4ED6+OOPKSsri3bt2iV/Z8eOHTptbN26lTw8POTz2LdvHy1btowiIiIoLCyMZs6cSX/5y1901LXa4ywsLKRFixbRp59+qvMbrVixgiIjI2n+/Pm0cuVK6tq1q85L2L59++izzz4jIlG1Gh0dTa1bt6a0tDQiElVfU6dOpXfffZeWLl1K06dPp9atW8vzKanEJfWuNJ9btmyR+5Dq7N69m/bs2UNNmjSh9u3by/M5atQoCgoKoh9++EGem06dOtGePXvkNkaOHElBQUEUFBREw4YNo3nz5tGxY8coMzOTiIjGjBkjq+al+WzevLk8n++//z5NnTqVvLy8aOnSpfJ8aqtNtc8lJyeHFi9eLKvBtcv79u1LH330Ea1cuZI8PT1p586dch1/f3/68MMP6fDhw/T8+XP6+eefydPTkx4/fkz//e9/6eTJk9SrVy+aO3cuLV68mCZNmkTdunWj3377Tcd8cP78eSIiWrduHTVt2tToM7IyKErwSKSlpem88Rni4cOHsr64JIIg0NKlS+nOnTsGy3Nzc2ndunWlPszT09PlY0OCJy8vj7755hu6efNmqeNMS0ujlJSUCpVnZmbSzZs3ZWFijNOnT8vnom0XmzFjhrxquXfvHh0/flx+6yIimjlzpvzATklJoR07dtCNGzd02tCuI9mvtC9U7fJz585RcHCwfNNL5dKqZteuXVRQUEBEJP8t2cbFixdp9uzZsmAqeR6///47nTx5khITE42OMzU1lU6fPi2vSojENz9JsOzfv582b95Md+7c0WkjJCREFlCPHj2i9evX06VLl0gQBNJoNHTo0CF5dXX69Gk6ceIE3bp1Sy4/cuQIBQQEyN/fu3cv3bhxg9RqtU4df39/IiL66aefaPjw4XTt2jWDbfz8888UEhJCcXFxOuXSSlPb/layD6mNo0eP0owZM+jy5csGz+PMmTN08uRJun//vtE20tLS6NixY/T777/rlEu/65dffinfC9ptREdHy+d6+/Zt2rBhg2zTjI2NpXHjxlFsbCyNHDmSdu3aRWfPnpVfNmNjY8nPz49++ukn8vX1pVOnTtGePXvoxo0b8kubVOfo0aM0ZswY2rJlC40dO1a+F7TbGD16NG3ZsoVmz56t18bEiRPp1KlT5OPjQwsWLCAi0utDamPr1q00b948unr1qs55nDlzhkaOHEk7d+6kCxcu0L1794y2ER8fT7GxsfJKTRrDmTNnaMSIEfT3v/9dvhe125gwYQIdOXKERo8eTVFRUbR+/Xq6evUqZWZm0gcffECJiYl0+fJl8vDwoIiICHr48CE9fPhQLk9KSqIzZ86Qh4cHhYeH04ULF+QVr7lRpOAxB9oPNkMYW8WUpDTDm/ZDy5rQHvOiRYuoa9euNHjwYFnlWFJl2aVLFxoyZIiOwd+YWrOk0V4qf+edd2jgwIHym7l2+cKFC6lbt240ePDgUvv4y1/+YrSPRYsWkaenp855GDuXwYMHy4b4ioxD6kd645XIysqi8PBwOnjwIPXv319PPSGpePfv30/9+/ennJwcKolU5+DBg9S7d2+9lwqp/NChQ9SnTx8dFaZUvnz5cjpy5Aj169ePcnNz9a7Rkm1IKwhTz0O7jX379hk8l6ysLPrss8/kcbx48cJoG5LKW1slrq3yDg8P11mhlSxftmwZrVixQu88S7axbNkynZc47fIVK1bQ8uXLdV4oS9ZZtWqVbPiX7m1DbWhfF2WdR8k6YWFhpZ7rqlWrymzjs88+o/DwcMrLy5PLJfPAhx9+SIMGDSIfHx+d+dI2HwwePJh8fHz0+jAn/7OC53+dkirJkm82ZZWXrNOkSRO9JXnJ8pKrP1PUouXpw5RxGuqnvG0YUz+UpcI1RcUr1enQoQPdunWr1HJDq+ny9lHZNsxxrh07dixT5W1IrW5KuVSnQ4cOBlXeUnnHjh2Nlps6jtLaMHWcZfXRqVOnMtvo1KmTnomgpHmg5It3WeXmhgXP/zilqSRNKTdHG5bowxJtlKXCLavcHG1Yog9LtVGWytsUlbg1tGEt4yzLPFBWuTnhtAgMY0YKCwtRo0aNCpebow1L9GGpNjQaDWxtbStcbi1tWMs4AXHDalmhe8wVNsoYLHgYhmEYi6KIDaQMwzDMHwcWPAzDMIxFYcHDMAzDWBQWPAzDMIxFYcHDMAzDWBQWPAzDMIxF+X8JBTY47/3dQAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for l in range(layer_num):\n", + "\n", + " fig, ax = plt.subplots(1, 1, figsize=(7, 5), dpi=70)\n", + " ax.plot(list(range(len(tokens))), mag_dict[f\"tc_min_{l}\"], label=\"FP_min\", color=\"gray\", linewidth=2.5, linestyle=\"--\")\n", + " ax.plot(list(range(len(tokens))), mag_dict[f\"tc_max_{l}\"], label=\"FP_max\", color=\"gray\", linewidth=2.5, linestyle=\"-.\")\n", + " # ax.plot(tokens, mag_dict[f\"tc_mean_{l}\"], label=\"tc_mean\", color=\"r\")\n", + "\n", + " ax.plot(list(range(len(tokens))), mag_dict[f\"0_min_{l}\"], label=\"1SB_M\", color=\"orange\", linewidth=2.5)\n", + " ax.plot(list(range(len(tokens))), mag_dict[f\"0_max_{l}\"], label=\"1SB_M\", color=\"orange\", linewidth=2.5)\n", + "\n", + " ax.plot(list(range(len(tokens))), mag_dict[f\"1_min_{l}\"], label=\"1SB_O\", color=\"dodgerblue\", linewidth=2.5)\n", + " ax.plot(list(range(len(tokens))), mag_dict[f\"1_max_{l}\"], label=\"1SB_O\", color=\"dodgerblue\", linewidth=2.5)\n", + "\n", + " # ax.plot(tokens, mag_dict[f\"2_min_{l}\"], label=\"1SB_LSM_M\", color=\"b\", linestyle=\"-.\")\n", + " # ax.plot(tokens, mag_dict[f\"2_max_{l}\"], label=\"1SB_LSM_M\", color=\"b\", linestyle=\"-.\")\n", + " # ax.plot(tokens, mag_dict[f\"{model_num}_mean_{l}\"], label=\"st_mean\", color=\"dodgerblue\")\n", + " # ax.set_tit#le(f\"FFN Output Layer {l}\", fontsize=20)\n", + " ax.legend(loc=2, fontsize=15)\n", + " plt.xticks(range(len(tokens)),rotation=45, fontsize=8)\n", + " # ax.set_xlabel(\"Token Number\", fontsize=20)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 1217, + "id": "cc100017", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# model_num = 0\n", + "# for l in range(layer_num):\n", + "\n", + "# fig, ax = plt.subplots(1, 1, figsize=(15, 5), dpi=50)\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"tc_min_{l}\"], label=\"FP_min\", color=\"gray\", linewidth=2.5, linestyle=\"--\")\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"tc_max_{l}\"], label=\"FP_max\", color=\"gray\", linewidth=2.5, linestyle=\"-.\")\n", + "\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"0_min_{l}\"], label=\"MI\", color=\"tab:orange\", linewidth=2.5)\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"0_max_{l}\"], label=\"MI\", color=\"tab:orange\", linewidth=2.5)\n", + " \n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"1_min_{l}\"], label=\"MIXED\", color=\"tab:blue\", linewidth=2.5)\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"1_max_{l}\"], label=\"MIXED\", color=\"tab:blue\", linewidth=2.5)\n", + " \n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"2_min_{l}\"], label=\"OI\", color=\"darkgoldenrod\", linewidth=2.5)\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"2_max_{l}\"], label=\"OI\", color=\"darkgoldenrod\", linewidth=2.5)\n", + "\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"3_min_{l}\"], label=\"OI\", color=\"r\", linewidth=2.5)\n", + "# ax.plot(list(range(len(tokens))), mag_dict[f\"3_max_{l}\"], label=\"OI\", color=\"r\", linewidth=2.5)\n", + " \n", + "# ax.set_title(f\"ATTN Output Layer {l}\", fontsize=20)\n", + "# ax.legend(loc=2, fontsize=15)\n", + "# plt.xticks(range(len(tokens)),rotation=45, fontsize=8)\n", + "# # ax.set_xlabel(\"Token Number\", fontsize=20)\n", + "# plt.show()\n", + "# # break\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1684, + "id": "d73aed1f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:05<00:00, 2.28it/s]\n", + "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:05<00:00, 2.30it/s]\n" + ] + } + ], + "source": [ + "st_2 = ranking_loss_func(student_probs_2, teacher_probs)\n", + "st_3 = ranking_loss_func(student_probs_3, teacher_probs)\n", + "# st_4 = ranking_loss_func(student_probs_4, teacher_probs)" + ] + }, + { + "cell_type": "code", + "execution_count": 1717, + "id": "fb9c4ae5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Ranking Loss(CoLA)')" + ] + }, + "execution_count": 1717, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(1, 1, figsize=(8, 5), dpi=120)\n", + "fs=20\n", + "lw=2.5\n", + "# ax.plot(list(range(layer_num*head_num)), st_1, label=\"Direct Q\", color=\"tab:red\", linewidth=lw, alpha=1)\n", + "ax.plot(list(range(layer_num*head_num)), st_2, label=\"TI-Output + Map\", color=\"darkblue\", linewidth=lw, alpha=0.8)\n", + "ax.plot(list(range(layer_num*head_num)), st_3, label=\"TI-Output\", color=\"tab:blue\", linewidth=lw, alpha=0.8)\n", + "# ax.plot(list(range(layer_num*head_num)), st_4, label=\"Ternary\", color=\"tab:orange\", linewidth=lw, alpha=0.8)\n", + "\n", + "ax.tick_params(axis=\"x\", labelsize=fs)\n", + "ax.tick_params(axis=\"y\", labelsize=fs)\n", + "ax.legend(fontsize=fs, loc=2)\n", + "ax.set_xlabel(\"Head Number\", fontsize=fs)\n", + "ax.set_ylabel(f\"Ranking Loss(CoLA)\", fontsize=fs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1318, + "id": "e54c005d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "COVER MEAN CHECK\n", + "0.7350823283195496\n", + "0.6579217910766602\n", + "0.6787551045417786\n", + "0.654063880443573\n", + "KL DIV CHECK\n", + "21.967914581298828\n", + "27.941110610961914\n", + "21.092199325561523\n", + "14.090266227722168\n" + ] + } + ], + "source": [ + "cover_mean_check = True\n", + "kl_div_check = True\n", + "student_probs = student_probs_2\n", + "exclude_sep = False\n", + "layer_num = 4\n", + "if cover_mean_check:\n", + " print(\"COVER MEAN CHECK\")\n", + " top_k = 5\n", + "\n", + " for i in range(layer_num):\n", + " teacher = teacher_probs[i][0]\n", + " student = student_probs[i][0]\n", + "\n", + " head_sum = 0\n", + " for h in range(head_num):\n", + " coverage_head_sum = 0\n", + " for row in range(seq_length-1):\n", + " if exclude_sep:\n", + " tc_argsort = teacher[h][:seq_length-1,:seq_length-1].sort(descending=True)[1][row][:top_k] # top-k\n", + " st_argsort = student[h][:seq_length-1,:seq_length-1].sort(descending=True)[1][row]\n", + " tc_argsort = teacher[h].sort(descending=True)[1][row][:top_k] # top-k\n", + " st_argsort = student[h].sort(descending=True)[1][row]\n", + "\n", + " max_idx = 0\n", + " for idx in tc_argsort:\n", + " tmp = torch.where(st_argsort == idx)\n", + " max_idx = max(tmp[0].item(), max_idx)\n", + "\n", + " coverage_ratio = max_idx / student.shape[1]\n", + " coverage_head_sum += coverage_ratio\n", + "\n", + " # print(f\"H{h} : {coverage_head_sum/seq_length}\")\n", + "\n", + " head_sum += coverage_head_sum / seq_length\n", + " print((head_sum / head_num).item())\n", + "\n", + "if kl_div_check:\n", + " print(\"KL DIV CHECK\")\n", + " for i in range(layer_num):\n", + " if exclude_sep:\n", + " if len(sep_index) == 2:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000; teacher_atts[i][:,:,:,sep_index[1]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000; student_atts[i][:,:,:,sep_index[1]] = -100000\n", + " else:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000\n", + " \n", + " teacher = torch.nn.Softmax(dim=-1)(teacher_atts[i])\n", + " student = torch.nn.Softmax(dim=-1)(student_atts[i])\n", + " \n", + " student = torch.clamp_min(student, 1e-8)\n", + " teacher = torch.clamp_min(teacher, 1e-8)\n", + " else: \n", + " teacher = teacher_probs[i]\n", + " student = student_probs[i]\n", + " \n", + " neg_cross_entropy = teacher * torch.log(student) \n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " # p(t) log p(t) = negative entropy\n", + " neg_entropy = teacher * torch.log(teacher) \n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " kld_loss = neg_entropy - neg_cross_entropy\n", + "\n", + " kld_loss_sum = torch.sum(kld_loss)\n", + " print(kld_loss_sum.item())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1276, + "id": "81c66000", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "device(type='cuda', index=0)" + ] + }, + "execution_count": 1276, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "markdown", + "id": "e75aa306", + "metadata": {}, + "source": [ + "# Per Layer Comp" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "1241c853", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "06/21 03:10:07 AM Loading model models/BERT_large/sst-2/pytorch_model.bin\n", + "06/21 03:10:07 AM loading model...\n", + "06/21 03:10:07 AM done!\n", + "06/21 03:10:07 AM loading configuration file output/BERT_large/sst-2/exploration/1SB_O/config.json\n", + "06/21 03:10:12 AM Loading model models/BERT_large/sst-2/pytorch_model.bin\n", + "06/21 03:10:13 AM loading model...\n", + "06/21 03:10:13 AM done!\n", + "\n", + "06/21 03:10:14 AM loading configuration file output/BERT_large/sst-2/exploration/1SB_O/config.json\n", + "06/21 03:10:19 AM Loading model models/BERT_large/sst-2/pytorch_model.bin\n", + "06/21 03:10:19 AM loading model...\n", + "06/21 03:10:19 AM done!\n", + "\n" + ] + } + ], + "source": [ + "cos_func = torch.nn.CosineSimilarity(dim=-1)\n", + "mag_dict = dict()\n", + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "mse_func = MSELoss()\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "first = True\n", + "for st_model_name in [\"1SB_O\", \"1SB_O\"]:\n", + " if first:\n", + " teacher_attnmap = True\n", + " num = \"TI\"\n", + " else:\n", + " teacher_attnmap = False\n", + " num = \"q\"\n", + " \n", + " student_model_dir = os.path.join(output_dir, task_name, \"exploration\", st_model_name) \n", + " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + " if first:\n", + " # Teacher Model Build\n", + " teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + " teacher_model.to(device)\n", + " teacher_model.eval()\n", + " model = teacher_model\n", + "\n", + "\n", + " # Student Model Build\n", + " student_config = BertConfig.from_pretrained(student_model_dir,\n", + " quantize_act=True,\n", + " quantize_weight=True,\n", + " weight_bits = 2, # Always Ternary when \"quantize_weight = True\"\n", + " input_bits = 8,\n", + " clip_val = 2.5,\n", + " quantize = True,\n", + " ffn_q_1 = True,\n", + " ffn_q_2 = True,\n", + " qkv_q = True,\n", + " emb_q = True,\n", + " cls_q = True,\n", + " clipping = False,\n", + " layer_num = -1,\n", + " mean_scale = 0.7,\n", + " quantizer = \"ternary\",\n", + " act_quantizer = \"ternary\",\n", + " init_scaling = 1,\n", + " clip_ratio = 1,\n", + " gradient_scaling = False,\n", + " clip_method = \"minmax\",\n", + " teacher_attnmap = teacher_attnmap, # CHANGE\n", + " parks = False,\n", + " stop_grad = False,\n", + " qk_FP = False,\n", + " map=False,\n", + " act_method = \"clipping\"\n", + " )\n", + "\n", + " student_model = QuantBertForSequenceClassification.from_pretrained(teacher_model_dir, config = student_config, num_labels=num_labels)\n", + " student_model.to(device)\n", + " model = student_model\n", + " print() \n", + " \n", + " mag_dict[f\"{num}_ffn_mse\"] = []; mag_dict[f\"{num}_attn_cos\"] = []; mag_dict[f\"{num}_attn_mse\"] = []; mag_dict[f\"{num}_ffn_cos\"] = []\n", + " \n", + " student_model.eval()\n", + " teacher_model.eval()\n", + " student_model.to(device)\n", + " teacher_model.to(device)\n", + " \n", + " teacher_outputs = teacher_model(input_ids_sliced.to(device))\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_zip = teacher_outputs\n", + " \n", + " student_logits, student_atts, student_reps, student_probs, student_zip = model(input_ids_sliced.to(device), teacher_outputs=teacher_outputs)\n", + " for l in range(layer_num):\n", + " tc_attn_context, tc_attn_output, tc_value_vector, tc_sa_output = teacher_zip[l]\n", + " st_attn_context, st_attn_output, st_value_vector, st_sa_output = student_zip[l] \n", + " st_ffn_output = student_reps[1+1]\n", + " tc_ffn_output = teacher_reps[l+1]\n", + " \n", + " # for token in range(len(tokens)):\n", + " tc_attn_output = tc_attn_context\n", + " st_attn_output = st_attn_context\n", + " \n", + " mse_attn_diff = mse_func(st_attn_output[0,:,:], tc_attn_output[0,:,:]).item()\n", + " cos_attn_diff = torch.mean((1-cos_func(st_attn_output[0,:,:], tc_attn_output[0,:,:]))).item()\n", + " mag_dict[f\"{num}_attn_mse\"].append(mse_attn_diff)\n", + " mag_dict[f\"{num}_attn_cos\"].append(cos_attn_diff)\n", + "\n", + " mse_ffn_diff = mse_func(student_reps[l+1][0,:,:], teacher_reps[l+1][0,:,:]).item()\n", + " cos_ffn_diff = torch.mean((1-cos_func(student_reps[l+1][0,:,:], teacher_reps[l+1][0,:,:]))).item()\n", + " mag_dict[f\"{num}_ffn_mse\"].append(mse_ffn_diff)\n", + " mag_dict[f\"{num}_ffn_cos\"].append(cos_ffn_diff)\n", + "# mag_dict[f\"{model_num}_mean_{l}\"].append(st_output[0,token,:].mean().item())\n", + "# mag_dict[f\"{model_num}_std_{l}\"].append(st_output[0,token,:].std().item())\n", + " first = False\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "f10e548b", + "metadata": {}, + "source": [ + "# Attention Output Comp Min-Max\n" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "ff15898f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'output/sst-2/exploration/1SB_M'" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "student_model_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "id": "6ac37737", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "06/15 04:22:44 AM Loading model models/rte/pytorch_model.bin\n", + "06/15 04:22:44 AM loading model...\n", + "06/15 04:22:44 AM done!\n", + "06/15 04:22:44 AM loading configuration file output/rte/exploration/1SB_O/config.json\n", + "06/15 04:22:46 AM Loading model output/rte/exploration/1SB_O/pytorch_model.bin\n", + "06/15 04:22:46 AM loading model...\n", + "06/15 04:22:46 AM done!\n" + ] + } + ], + "source": [ + "if teacher_model is None:\n", + " teacher_model_dir = os.path.join(model_dir,task_name)\n", + " teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + " teacher_model.to(device)\n", + " teacher_model.eval()\n", + "\n", + "st_model_name = \"1SB_O\"\n", + "student_model_dir = os.path.join(output_dir, task_name, \"exploration\", st_model_name) \n", + "student_config = BertConfig.from_pretrained(student_model_dir) \n", + "student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config, num_labels=num_labels)\n", + "student_model.to(device)\n", + "model = student_model\n", + "\n", + "teacher_outputs = teacher_model(input_ids_sliced.to(device))\n", + "student_outputs = model(input_ids_sliced.to(device), teacher_outputs=None)\n", + "\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_zip = teacher_outputs\n", + "student_logits, student_atts, student_reps, student_probs, student_zip = student_outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "13f0b734", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([ 0, 21, 4, 19, 9], device='cuda:0')\n", + "tensor([ 0, 34, 27, 20, 11], device='cuda:0')\n", + "tensor([ 0, 35, 28, 27, 34], device='cuda:0')\n", + "tensor([28, 35, 0, 11, 26], device='cuda:0')\n", + "tensor([28, 35, 0, 10, 30], device='cuda:0')\n", + "tensor([28, 35, 13, 5, 1], device='cuda:0')\n", + "tensor([28, 35, 5, 21, 13], device='cuda:0')\n", + "tensor([35, 28, 21, 12, 13], device='cuda:0')\n", + "tensor([28, 35, 9, 34, 27], device='cuda:0')\n", + "tensor([28, 35, 21, 34, 27], device='cuda:0')\n", + "tensor([27, 34, 35, 28, 31], device='cuda:0')\n", + "tensor([27, 34, 35, 28, 33], device='cuda:0')\n" + ] + } + ], + "source": [ + "for l in range(layer_num):\n", + " token_avg_tc = teacher_probs[l][0,3,:,:].mean(dim=0)\n", + " print(torch.sort(token_avg_tc, descending=True)[1][:5])" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "3645f7b9", + "metadata": {}, + "outputs": [], + "source": [ + "token_avg_tc = teacher_probs[-2][0,3,:,:].mean(dim=0)\n", + "outlier_index = torch.sort(token_avg_tc, descending=True)[1][:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "id": "d1447f6e", + "metadata": {}, + "outputs": [], + "source": [ + "cls_index = 0\n", + "\n", + "teacher_list = dict()\n", + "student_list = dict()\n", + "\n", + "teacher_list[\"cls\"] = []\n", + "student_list[\"cls\"] = []\n", + "\n", + "for i, outlier in enumerate(outlier_index):\n", + " teacher_list[f\"ol_{i}\"] = []\n", + " student_list[f\"ol_{i}\"] = []\n", + "\n", + "for i, punc in enumerate(punc_index_1):\n", + " teacher_list[f\"comma_{i}\"] = []\n", + " student_list[f\"comma_{i}\"] = []\n", + " \n", + "for i, punc in enumerate(punc_index_2):\n", + " teacher_list[f\"period_{i}\"] = []\n", + " student_list[f\"period_{i}\"] = []\n", + "\n", + "for i , sep in enumerate(sep_index):\n", + " teacher_list[f\"sep_{i}\"] = []\n", + " student_list[f\"sep_{i}\"] = []\n", + "\n", + "# Order\n", + "seq_length = len(tokens) - 1\n", + "# layer_num = 12\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_tc = teacher_probs[l][0,h,:,:].mean(dim=0).clone().detach()\n", + " token_order_tc = torch.sort(token_avg_tc, stable=True)[1].clone().detach()\n", + " \n", + " ratio = torch.where(token_order_tc == cls_index)[0] / seq_length\n", + " teacher_list[f\"cls\"].append(ratio.item())\n", + " \n", + " for i, ol in enumerate(outlier_index):\n", + " ratio = torch.where(token_order_tc == ol)[0] / seq_length\n", + " teacher_list[f\"ol_{i}\"].append(ratio.item())\n", + " \n", + " for i, sep in enumerate(sep_index):\n", + " ratio = torch.where(token_order_tc == sep)[0] / seq_length\n", + " teacher_list[f\"sep_{i}\"].append(ratio.item())\n", + " \n", + " for i, punc in enumerate(punc_index_1):\n", + " ratio = torch.where(token_order_tc == punc)[0] / seq_length\n", + " teacher_list[f\"comma_{i}\"].append(ratio.item())\n", + " \n", + " for i, punc in enumerate(punc_index_2):\n", + " ratio = torch.where(token_order_tc == punc)[0] / seq_length\n", + " teacher_list[f\"period_{i}\"].append(ratio.item())\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_st = student_probs[l][0,h,:,:].mean(dim=0).clone().detach()\n", + " token_order_st = torch.sort(token_avg_st, stable=True)[1].clone().detach()\n", + " \n", + " ratio = torch.where(token_order_st == cls_index)[0] / seq_length\n", + " student_list[f\"cls\"].append(ratio.item())\n", + " \n", + " for i, ol in enumerate(outlier_index):\n", + " ratio = torch.where(token_order_st == ol)[0] / seq_length\n", + " student_list[f\"ol_{i}\"].append(ratio.item())\n", + " \n", + " for i, sep in enumerate(sep_index):\n", + " ratio = torch.where(token_order_st == sep)[0] / seq_length\n", + " student_list[f\"sep_{i}\"].append(ratio.item())\n", + " \n", + " for i, punc in enumerate(punc_index_1):\n", + " ratio = torch.where(token_order_st == punc)[0] / seq_length\n", + " student_list[f\"comma_{i}\"].append(ratio.item())\n", + " \n", + " for i, punc in enumerate(punc_index_2):\n", + " ratio = torch.where(token_order_st == punc)[0] / seq_length\n", + " student_list[f\"period_{i}\"].append(ratio.item()) \n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "id": "afb6e60a", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "for dict_list in [teacher_list, student_list]:\n", + " fig, [ax1,ax2] = plt.subplots(2, 1, figsize=(14, 8), dpi=70)\n", + " \n", + " # fig, ax1 = plt.subplots(1, 1, figsize=(14, 5), dpi=70)\n", + " color_list = [\"b\", \"c\", \"tab:blue\", \"tab:orange\", \"crimson\", \"red\"]\n", + " punc_name = [\"comma\", \"comma\", \"comma\", \"period\", \"period\"]\n", + " # ax.plot(list(range(layer_num*head_num)), dict_list[\"sep_0\"], label=\"sep_0\", color='gray')\n", + " # ax.plot(list(range(layer_num*head_num)), dict_list[\"sep_1\"], label=\"sep_1\", color='gray')\n", + " # ax.plot(list(range(layer_num*head_num)), dict_list[\"cls\"], label=\"cls\", color='tab:brown')\n", + " \n", + " for i, ol in enumerate(outlier_index):\n", + " ax1.plot(list(range(layer_num*head_num)), dict_list[f\"ol_{i}\"], label=f\"index-{ol}-{tokens[ol]}\", linewidth=2.5)\n", + " for i, punc in enumerate(punc_index_1):\n", + " ax2.plot(list(range(layer_num*head_num)), dict_list[f\"comma_{i}\"], label=f\"comma{i}-{punc}\", linewidth=2.5)\n", + " for i, punc in enumerate(punc_index_2):\n", + " ax2.plot(list(range(layer_num*head_num)), dict_list[f\"period_{i}\"], label=f\"period{i}-{punc}\", linewidth=2.5)\n", + " \n", + " ax1.legend(fontsize=15, loc=3)\n", + " ax2.legend(fontsize=15, loc=3)\n", + " ax1.set_xlim([108,144])\n", + " ax2.set_xlim([108,144])\n", + " \n", + " for l in range(layer_num):\n", + " ax1.axvline(x=l*12, color=\"lightgray\")\n", + " ax2.axvline(x=l*12, color=\"lightgray\")\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 287, + "id": "bd76f529", + "metadata": {}, + "outputs": [], + "source": [ + "teacher_list = None\n", + "student_list = None\n", + "student_model = None\n", + "student_outputs = None\n", + "teacher_outputs = None\n", + "with torch.no_grad():\n", + " torch.cuda.empty_cache()" + ] + }, + { + "cell_type": "markdown", + "id": "5152fbe1", + "metadata": {}, + "source": [ + "# Norm Based Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "id": "e926f233", + "metadata": {}, + "outputs": [], + "source": [ + "norm_func = torch.linalg.norm\n", + "layer_num = 6\n", + "add = 18\n", + "table_tc_prob = [[0] * head_num for i in range(layer_num)]\n", + "table_st_prob = [[0] * head_num for i in range(layer_num)]\n", + "table_diff_prob = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "table_tc_fx = [[0] * head_num for i in range(layer_num)]\n", + "table_st_fx = [[0] * head_num for i in range(layer_num)]\n", + "table_diff_fx = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "table_tc_afx = [[0] * head_num for i in range(layer_num)]\n", + "table_st_afx = [[0] * head_num for i in range(layer_num)]\n", + "table_diff_afx = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "token_index = 54# punc_index[0] # outlier_index[0]\n", + "\n", + "for l in range(layer_num):\n", + " \n", + " tc_prob = teacher_probs[l+add]\n", + " st_prob = student_probs[l+add]\n", + " \n", + " tc_context, tc_output, tc_value, tc_output_hs, tc_norm = teacher_zips[l+add]\n", + " st_context, st_output, st_value, st_output_hs, st_norm = student_zips[l+add]\n", + " \n", + " tc_transformed_norm, tc_weighted_norm, tc_summed_weighted_norm = tc_norm\n", + " st_transformed_norm, st_weighted_norm, st_summed_weighted_norm = st_norm\n", + " \n", + " \n", + " for h in range(head_num):\n", + " table_tc_prob[l][h] = tc_prob[:,h,:,token_index].mean().item()\n", + " table_st_prob[l][h] = st_prob[:,h,:,token_index].mean().item()\n", + " table_diff_prob[l][h] = (tc_prob[:,h,token_index].mean() - st_prob[:,h,token_index].mean()).abs().item()\n", + " \n", + " table_tc_fx[l][h] = norm_func(tc_transformed_norm[:,h,token_index], dim=-1).item()\n", + " table_st_fx[l][h] = norm_func(st_transformed_norm[:,h,token_index], dim=-1).item()\n", + " table_diff_fx[l][h] = (norm_func(tc_transformed_norm[:,h,token_index], dim=-1) - norm_func(st_transformed_norm[:,h,token_index], dim=-1)).abs().item()\n", + " \n", + " table_tc_afx[l][h] = norm_func(tc_weighted_norm[:,h,token_index], dim=-1).item()\n", + " table_st_afx[l][h] = norm_func(st_weighted_norm[:,h,token_index], dim=-1).item()\n", + " table_diff_afx[l][h] = (norm_func(tc_weighted_norm[:,h,token_index,:], dim=-1) - norm_func(st_weighted_norm[:,h,token_index,:], dim=-1)).abs().item()\n", + " \n", + " \n", + "\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c970a765", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'plt' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [2]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m fig, [ax1,ax2,ax3] \u001b[38;5;241m=\u001b[39m \u001b[43mplt\u001b[49m\u001b[38;5;241m.\u001b[39msubplots(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m3\u001b[39m, figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m32\u001b[39m,\u001b[38;5;241m8\u001b[39m))\n\u001b[1;32m 3\u001b[0m ax1\u001b[38;5;241m.\u001b[39mset_xlabel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhead\u001b[39m\u001b[38;5;124m\"\u001b[39m, fontsize\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m); ax1\u001b[38;5;241m.\u001b[39mset_ylabel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlayer\u001b[39m\u001b[38;5;124m\"\u001b[39m, fontsize\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m)\n\u001b[1;32m 4\u001b[0m heatmap\u001b[38;5;241m=\u001b[39max1\u001b[38;5;241m.\u001b[39mpcolor(table_tc_prob, cmap\u001b[38;5;241m=\u001b[39mplt\u001b[38;5;241m.\u001b[39mcm\u001b[38;5;241m.\u001b[39mBlues)\n", + "\u001b[0;31mNameError\u001b[0m: name 'plt' is not defined" + ] + } + ], + "source": [ + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "\n", + "ax1.set_xlabel(\"head\", fontsize=20); ax1.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax1.pcolor(table_tc_prob, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax1)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax1.set_title(\"TC PROB\", fontsize=25)\n", + "\n", + "ax2.set_xlabel(\"head\", fontsize=20); ax2.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax2.pcolor(table_tc_fx, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax2)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax2.set_title(\"TC || f(x) ||\", fontsize=25)\n", + "\n", + "ax3.set_xlabel(\"head\", fontsize=20); ax3.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax3.pcolor(table_tc_afx, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax3)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax3.set_title(\"TC || af(x) ||\", fontsize=25)\n", + " \n", + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "\n", + "ax1.set_xlabel(\"head\", fontsize=20); ax1.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax1.pcolor(table_st_prob, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax1)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax1.set_title(\"ST PROB\", fontsize=25)\n", + "\n", + "ax2.set_xlabel(\"head\", fontsize=20); ax2.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax2.pcolor(table_st_fx, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax2)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax2.set_title(\"ST || f(x) ||\", fontsize=25)\n", + "\n", + "ax3.set_xlabel(\"head\", fontsize=20); ax3.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax3.pcolor(table_st_afx, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax3)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax3.set_title(\"ST || af(x) ||\", fontsize=25)\n", + "\n", + "\n", + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "\n", + "ax1.set_xlabel(\"head\", fontsize=20); ax1.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax1.pcolor(table_diff_prob, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax1)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax1.set_title(\"DIFF PROB\", fontsize=25)\n", + "\n", + "ax2.set_xlabel(\"head\", fontsize=20); ax2.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax2.pcolor(table_diff_fx, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax2)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax2.set_title(\"DIFF || f(x) ||\", fontsize=25)\n", + "\n", + "ax3.set_xlabel(\"head\", fontsize=20); ax3.set_ylabel(\"layer\", fontsize=20)\n", + "heatmap=ax3.pcolor(table_diff_afx, cmap=plt.cm.Blues)\n", + "cb = fig.colorbar(heatmap, ax=ax3)\n", + "cb.ax.tick_params(labelsize=20)\n", + "ax3.set_title(\"DIFF || af(x) ||\", fontsize=25)\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 415, + "id": "35336d97", + "metadata": {}, + "outputs": [], + "source": [ + "# Avg Attention\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_tc = teacher_probs[l][0,h,:,:].mean(dim=0)\n", + " token_avg_st = student_probs[l][0,h,:,:].mean(dim=0)\n", + " \n", + " # Logging\n", + " for i, sep in enumerate(sep_index):\n", + " teacher_list[f\"sep_{i}\"].append(token_avg_tc[sep].item())\n", + " student_list[f\"sep_{i}\"].append(token_avg_st[sep].item())\n", + " \n", + " for i, punc in enumerate(punc_index):\n", + " teacher_list[f\"punc_{i}\"].append(token_avg_tc[punc].item())\n", + " student_list[f\"punc_{i}\"].append(token_avg_st[punc].item())\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "6261fa56", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([1, 16, 68, 68])" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tc_weighted_norm.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a47e4e94", + "metadata": {}, + "outputs": [], + "source": [ + "# This is for Output Save Code\n", + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "mse_func = MSELoss()\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "tensor_dir = f\"tensor_files/{task_name}\"\n", + "if not os.path.exists(tensor_dir):\n", + " os.mkdir(tensor_dir)\n", + " \n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "# Teacher Model Build\n", + "teacher_model_dir = os.path.join(model_dir,task_name)\n", + "teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + "teacher_model.to(device)\n", + "teacher_model.eval()\n", + "\n", + "teacher_outputs = teacher_model(input_ids_sliced.to(device))\n", + "torch.save(teacher_outputs, os.path.join(tensor_dir, f\"teacher_outputs.pt\"))\n", + "\n", + "# Student Model Build\n", + "name_1 = \"1SB_O\"\n", + "name_2 = \"1SB_M\"\n", + "model_list = [name_1, name_2]\n", + "\n", + "for st_model_name in model_list:\n", + " \n", + " student_model_dir = os.path.join(output_dir, task_name, \"exploration\", st_model_name) \n", + " student_config = BertConfig.from_pretrained(student_model_dir) \n", + " student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config, num_labels=num_labels)\n", + " student_model.to(device)\n", + " student_model.eval()\n", + "\n", + " student_outputs = student_model(input_ids_sliced.to(device), teacher_outputs=None)\n", + " torch.save(student_outputs, os.path.join(tensor_dir, f\"{st_model_name}_student_outputs.pt\"))\n", + " \n", + " \n", + "file_name = \"1SB_M\"\n", + "\n", + "# Load\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_zip = torch.load(f\"tensor_files/{task_name}/teacher_outputs.pt\")\n", + "student_logits, student_atts, student_reps, student_probs, student_zip = torch.load(f\"tensor_files/{task_name}/{file_name}_student_outputs.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07df970e", + "metadata": {}, + "outputs": [], + "source": [ + "norm_func = torch.linalg.norm\n", + "layer_num = 6\n", + "add = 6\n", + "table_tc_prob = [[0] * head_num for i in range(layer_num)]\n", + "table_st_prob = [[0] * head_num for i in range(layer_num)]\n", + "table_diff_prob = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "table_tc_value = [[0] * head_num for i in range(layer_num)]\n", + "table_st_value = [[0] * head_num for i in range(layer_num)]\n", + "table_diff_value = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "table_tc_context = [[0] * head_num for i in range(layer_num)]\n", + "table_st_context = [[0] * head_num for i in range(layer_num)]\n", + "table_diff_context = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "table_tc_output = [[0] for i in range(layer_num)]\n", + "table_st_output = [[0] for i in range(layer_num)]\n", + "table_diff_output = [[0] for i in range(layer_num)]\n", + "\n", + "token_index = 0 # outlier_index[0]\n", + "\n", + "for l in range(layer_num):\n", + " \n", + " tc_prob = teacher_probs[l+add]\n", + " st_prob = student_probs[l+add]\n", + " \n", + " tc_context, tc_output, tc_value, tc_output_hs, tc_norm = teacher_zips[l+add]\n", + " st_context, st_output, st_value, st_output_hs, st_norm = student_zips[l+add]\n", + " \n", + " tc_tranformed_norm, tc_weighted_norm, tc_summed_weighted_norm = tc_norm\n", + " st_tranformed_norm, st_weighted_norm, st_summed_weighted_norm = st_norm\n", + " \n", + " \n", + " for h in range(head_num):\n", + " table_tc_prob[l][h] = tc_prob[:,h,:,token_index].mean().item()\n", + " table_st_prob[l][h] = st_prob[:,h,:,token_index].mean().item()\n", + " table_diff_prob[l][h] = (tc_prob[:,h,:,token_index].mean() - st_prob[:,h,:,token_index].mean()).abs().item()\n", + " \n", + " table_tc_value[l][h] = norm_func(tc_value[:,h,token_index,:], dim=-1).item()\n", + " table_st_value[l][h] = norm_func(st_value[:,h,token_index,:], dim=-1).item()\n", + " table_diff_value[l][h] = (norm_func(tc_value[:,h,token_index,:], dim=-1) - norm_func(st_value[:,h,token_index,:], dim=-1)).abs().item()\n", + " \n", + " table_tc_context[l][h] = norm_func(tc_context[:,h,token_index,:], dim=-1).item()\n", + " table_st_context[l][h] = norm_func(st_context[:,h,token_index,:], dim=-1).item()\n", + " table_diff_context[l][h] = (norm_func(tc_context[:,h,token_index,:], dim=-1) - norm_func(st_context[:,h,token_index,:], dim=-1)).abs().item()\n", + " \n", + " table_tc_output[l] = norm_func(tc_output[:,token_index,:], dim=-1).item()\n", + " table_st_output[l] = norm_func(st_output[:,token_index,:], dim=-1).item()\n", + " table_diff_output[l] = (norm_func(tc_output[:,token_index,:], dim=-1) - norm_func(st_output[:,token_index,:], dim=-1)).abs().item()\n", + " \n", + " \n", + "\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4c8e862", + "metadata": {}, + "outputs": [], + "source": [ + "kl_loss = torch.nn.KLDivLoss(reduction=\"batchmean\")\n", + "mse_func = MSELoss()\n", + "\n", + "map_diff_map = []\n", + "map_diff_output = []\n", + "\n", + "value_diff_map = []\n", + "value_diff_output = []\n", + "\n", + "context_diff_map = []\n", + "context_diff_output = []\n", + "\n", + "output_diff_map = []\n", + "output_diff_output = []\n", + "\n", + "ffn_diff_map = []\n", + "ffn_diff_output = []\n", + "\n", + "teacher_model_dir = os.path.join(model_dir,task_name)\n", + "teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + "teacher_model.to(device)\n", + "teacher_model.eval()\n", + "\n", + "teacher_outputs = teacher_model(input_ids_sliced.to(device))\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_zip = teacher_outputs\n", + "\n", + "for st_model_name in [\"1SB_O\", \"1SB_M_O\"]:\n", + " student_model_dir = os.path.join(output_dir, task_name, \"exploration\", st_model_name) \n", + " student_config = BertConfig.from_pretrained(student_model_dir) \n", + " student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config, num_labels=num_labels)\n", + " student_model.to(device)\n", + " \n", + " student_outputs = student_model(input_ids_sliced.to(device), teacher_outputs=None)\n", + " student_logits, student_atts, student_reps, student_probs, student_zip = student_outputs\n", + " \n", + " for l in range(layer_num):\n", + " tc_prob = teacher_probs[l]\n", + " st_prob = student_probs[l]\n", + " \n", + " tc_ffn = teacher_reps[l]\n", + " st_ffn = student_reps[l]\n", + " \n", + " tc_context, tc_output, tc_value, tc_output_hs, tc_norm = teacher_zip[l]\n", + " st_context, st_output, st_value, st_output_hs, st_norm = student_zip[l]\n", + "\n", + " tc_transformed_norm, tc_weighted_norm, tc_summed_weighted_norm = tc_norm\n", + " st_transformed_norm, st_weighted_norm, st_summed_weighted_norm = st_norm\n", + "\n", + " ffn_diff = mse_func(tc_ffn[0,:,:], st_ffn[0,:,:])\n", + " output_diff = mse_func(tc_value[0,:,:], st_value[0,:,:])\n", + " \n", + " if \"1SB_O\" == st_model_name:\n", + " ffn_diff_output.append(ffn_diff.item())\n", + " output_diff_output.append(output_diff.item())\n", + " else:\n", + " ffn_diff_map.append(ffn_diff.item())\n", + " output_diff_map.append(output_diff.item())\n", + " \n", + " for h in range(head_num):\n", + " map_diff = kl_loss(st_prob[0,h,:,:].log(), tc_prob[0,h,:,:])\n", + " value_diff = mse_func(tc_value[0,h,:,:], st_value[0,h,:,:])\n", + " context_diff = mse_func(tc_context[0,h,:,:], st_context[0,h,:,:])\n", + " \n", + " if \"1SB_O\" == st_model_name:\n", + " map_diff_output.append(map_diff.item())\n", + " value_diff_output.append(value_diff.item())\n", + " context_diff_output.append(context_diff.item())\n", + " else:\n", + " map_diff_map.append(map_diff.item())\n", + " value_diff_map.append(value_diff.item())\n", + " context_diff_map.append(context_diff.item())\n", + " \n", + "fig, [ax1,ax2,ax3] = plt.subplots(3, 1, figsize=(14, 12), dpi=70)\n", + "x_len = layer_num*head_num\n", + "ax1.plot(list(range(x_len)),map_diff_map, linewidth=2.5 )\n", + "ax1.plot(list(range(x_len)),map_diff_output, linewidth=2.5 )\n", + "\n", + "ax2.plot(list(range(x_len)),value_diff_map, linewidth=2.5 )\n", + "ax2.plot(list(range(x_len)),value_diff_output, linewidth=2.5 )\n", + "\n", + "ax3.plot(list(range(x_len)),context_diff_map, linewidth=2.5 )\n", + "ax3.plot(list(range(x_len)),context_diff_output, linewidth=2.5 )\n", + "\n", + "fig, [ax1,ax2] = plt.subplots(2, 1, figsize=(14, 8), dpi=70)\n", + "x_len = layer_num\n", + "ax1.plot(list(range(x_len)),ffn_diff_map, linewidth=2.5 )\n", + "ax1.plot(list(range(x_len)),ffn_diff_output, linewidth=2.5 )\n", + "\n", + "ax2.plot(list(range(x_len)),output_diff_map, linewidth=2.5 )\n", + "ax2.plot(list(range(x_len)),output_diff_output, linewidth=2.5 )\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ce5207d", + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 5), dpi=70)\n", + "i = 0\n", + "fs = 15\n", + "# for a in [prob_other_diff, prob_cls_diff, prob_sep_diff, prob_punc_diff]:\n", + "# label = [\"Output\", \"Map\"]\n", + "for a in [diff_dict[\"0_prob_\"]]:\n", + " plt.plot(list(range(layer_num)), a, label=label[i], linewidth=2.5)\n", + " i += 1\n", + " \n", + "plt.xlabel(\"Layer\", fontsize=fs)\n", + "plt.ylabel(\"Avg. $α$\", fontsize=fs)\n", + "plt.title(\"Average attention weight\", fontsize=fs)\n", + "plt.legend()\n", + "plt.show() \n", + "\n", + "plt.figure(figsize=(10, 5), dpi=70)\n", + "i = 0\n", + "# for a in [tr_other_diff, tr_cls_diff, tr_sep_diff, tr_punc_diff]:\n", + "for a in [tr_punc_diff]:\n", + " plt.plot(list(range(layer_num)), a, label=label[i], linewidth=2.5)\n", + " i += 1\n", + " \n", + "plt.xlabel(\"Layer\", fontsize=fs)\n", + "plt.ylabel(\"Avg. ||f(x)||\", fontsize=fs)\n", + "plt.title(\"Average Norm || f(x)||\", fontsize=fs)\n", + "plt.legend()\n", + "plt.show() \n", + "\n", + "plt.figure(figsize=(10, 5), dpi=70)\n", + "i = 0\n", + "# for a in [wt_other_diff, wt_cls_diff, wt_sep_diff, wt_punc_diff]:\n", + "for a in [wt_punc_diff]:\n", + " plt.plot(list(range(layer_num)), a, label=label[i], linewidth=2.5)\n", + " i += 1\n", + " \n", + "plt.xlabel(\"Layer\", fontsize=fs)\n", + "plt.ylabel(\"Avg. || $α$ f(x) ||\", fontsize=fs)\n", + "plt.title(\"Average Norm || $α$ f(x) ||\", fontsize=fs)\n", + "plt.legend()\n", + "plt.show() \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f1880ad", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51233774", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59d72d08", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/BERTviz-base.ipynb b/notebooks/BERTviz-base.ipynb new file mode 100644 index 0000000..5b62e5b --- /dev/null +++ b/notebooks/BERTviz-base.ipynb @@ -0,0 +1,1283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 37, + "id": "9f99d9c2", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import absolute_import, division, print_function\n", + "\n", + "import pprint\n", + "import argparse\n", + "import logging\n", + "import os\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"3\" # Set GPU Index to use\n", + "os.environ['CUDA_LAUNCH_BLOCKING'] = \"1\"\n", + "import random\n", + "import sys\n", + "import pickle\n", + "import copy\n", + "import collections\n", + "import math\n", + "\n", + "import numpy as np\n", + "import numpy\n", + "import torch\n", + "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler,TensorDataset\n", + "# from torch.utils.tensorboard import SummaryWriter\n", + "\n", + "from torch.nn import CrosnsEntropyLoss, MSELoss\n", + "from tqdm import tqdm\n", + "from transformer import BertForSequenceClassification,WEIGHTS_NAME, CONFIG_NAME\n", + "from transformer.modeling_quant import BertForSequenceClassification as QuantBertForSequenceClassification\n", + "from transformer import BertTokenizer\n", + "from transformer import BertAdam\n", + "from transformer import BertConfig\n", + "from transformer import QuantizeLinear, QuantizeAct, BertSelfAttention, FP_BertSelfAttention, ClipLinear\n", + "from utils_glue import *\n", + "from bertviz import model_view\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import torch.nn.functional as F\n", + "\n", + "class AverageMeter(object):\n", + " \"\"\"Computes and stores the average and current value\"\"\"\n", + " def __init__(self):\n", + " self.reset()\n", + "\n", + " def reset(self):\n", + " self.val = 0\n", + " self.avg = 0 \n", + " self.sum = 0\n", + " self.count = 0\n", + "\n", + " def update(self, val, n=1):\n", + " self.val = val\n", + " self.sum += val * n\n", + " self.count += n\n", + " self.avg = self.sum / self.count\n", + "\n", + "def cv_initialize(model, loader, ratio, device):\n", + " \n", + " def initialize_hook(module, input, output):\n", + " if isinstance(module, (QuantizeLinear, QuantizeAct, ClipLinear)):\n", + " \"\"\"KDLSQ-BERT ACT Quant init Method\n", + " Ref: https://arxiv.org/abs/2101.05938\n", + " \"\"\"\n", + " if not isinstance(input, torch.Tensor):\n", + " input = input[0]\n", + " \n", + " n = torch.numel(input)\n", + " input_sorted, index = torch.sort(input.reshape(-1), descending=False)\n", + " \n", + " index_min = torch.round(ratio * n / 2)\n", + " index_max = n - index_min\n", + " \n", + " s_init = (input_sorted[int(index_min)].to(device), input_sorted[int(index_max)].to(device))\n", + " \n", + " # MATPLOT\n", + " \n", + " fig, [ax1, ax2, ax3] = plt.subplots(1,3, figsize=(16, 4)) \n", + " \n", + " sns.histplot(data=input.reshape(-1).detach().cpu().numpy(), kde = True, bins=100, ax=ax1)\n", + " sns.rugplot(data=input.reshape(-1).detach().cpu().numpy(), ax=ax1)\n", + " sns.histplot(data=module.weight.reshape(-1).detach().cpu().numpy(), kde = True, bins=100, ax=ax2)\n", + " sns.rugplot(data=module.weight.reshape(-1).detach().cpu().numpy(), ax=ax2)\n", + " sns.histplot(data=output.reshape(-1).detach().cpu().numpy(), kde = True, bins=100, ax=ax3)\n", + " sns.rugplot(data=output.reshape(-1).detach().cpu().numpy(), ax=ax3)\n", + " # fig, [ax1, ax2] = plt.subplots(1,2, figsize=(12, 4)) \n", + " \n", + " # sns.distplot(input.reshape(-1).detach().cpu().numpy() , hist = True, rug = True, kde = True, bins=100, norm_hist=False, kde_kws=dict(linewidth=0.5), rug_kws=dict(linewidth=0.5), ax=ax1)\n", + " # sns.distplot(output.reshape(-1).detach().cpu().numpy() , hist = True, rug = True, kde = True, bins=100, norm_hist=False, kde_kws=dict(linewidth=0.5), rug_kws=dict(linewidth=0.5), ax=ax2)\n", + " # # plt.axvline(x=s_init[0].detach().cpu().numpy(), color='r', linestyle='--')\n", + " # # plt.axvline(x=s_init[1].detach().cpu().numpy(), color='r', linestyle='--')\n", + "\n", + " ax1.set_xlabel(\"Input Activation\")\n", + " # ax2.set_xlabel(\"Output Activation\")\n", + " ax2.set_xlabel(\"Module Weight\")\n", + " ax3.set_xlabel(\"Output Activation\")\n", + " \n", + " ax1.set_ylabel(\"Density\")\n", + " ax2.set_ylabel(\"Density\")\n", + " ax3.set_ylabel(\"Density\")\n", + "\n", + " ax1.set_title(f\"{module.name} Input ACT histogram\")\n", + " # ax2.set_title(f\"{module.name} Output ACT histogram\")\n", + " ax2.set_title(f\"{module.name} Weight histogram\")\n", + " ax3.set_title(f\"{module.name} Output ACT histogram\")\n", + " # plt.savefig(f\"plt_storage/hook_inputs/sst-2-fp/{module.name}.png\")\n", + " plt.show()\n", + " plt.close(fig)\n", + " # module.clip_initialize(s_init)\n", + " # logger.info(f\"{module} : min {s_init[0].item()} max {s_init[1].item()}\") \n", + "\n", + " \n", + " hooks = []\n", + "\n", + " for name, module in model.named_modules():\n", + " hook = module.register_forward_hook(initialize_hook)\n", + " hooks.append(hook)\n", + " \n", + " model.train()\n", + " model.to(device)\n", + " \n", + " for step, batch in enumerate(loader):\n", + " batch = tuple(t.to(\"cuda\") for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch \n", + " with torch.no_grad():\n", + " student_logits, student_atts, student_reps, student_probs, student_values = model(input_ids, segment_ids, input_mask, teacher_probs=None)\n", + " break\n", + " \n", + " for hook in hooks:\n", + " hook.remove()\n", + "\n", + "def str2bool(v):\n", + " if isinstance(v, bool):\n", + " return v\n", + " if v.lower() in ('yes', 'true', 't', 'y', '1'):\n", + " return True\n", + " elif v.lower() in ('no', 'false', 'f', 'n', '0'):\n", + " return False\n", + " else:\n", + " raise argparse.ArgumentTypeError('Boolean value expected.')\n", + "\n", + "def load_vocab(vocab_file):\n", + " \"\"\"Loads a vocabulary file into a dictionary.\"\"\"\n", + " vocab = collections.OrderedDict()\n", + " index = 0\n", + " with open(vocab_file, \"r\", encoding=\"utf-8\") as reader:\n", + " while True:\n", + " token = reader.readline()\n", + " if not token:\n", + " break\n", + " token = token.strip()\n", + " #vocab[token] = index\n", + " vocab[index] = token\n", + " index += 1\n", + " return vocab\n", + "\n", + "def attention_pattern(model, loader, device):\n", + " \n", + " def initialize_hook(module, input, output):\n", + " if isinstance(module, BertSelfAttention):\n", + " \n", + " attn_mask = input[1]\n", + " attention_output = output[-2][\"attn\"]\n", + " \n", + " seq_length = (attn_mask == 0).sum()\n", + " \n", + " print(attention_output[0,:,:seq_length,seq_length-1].mean().item())\n", + " \n", + "\n", + " hooks = []\n", + "\n", + " for name, module in model.named_modules():\n", + " hook = module.register_forward_hook(initialize_hook)\n", + " hooks.append(hook)\n", + " \n", + " model.eval()\n", + " model.to(device)\n", + " \n", + " for step, batch in enumerate(loader):\n", + " batch = tuple(t.to(\"cuda\") for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch \n", + " with torch.no_grad():\n", + " student_logits, student_atts, student_reps, student_probs, student_values = model(input_ids, segment_ids, input_mask)\n", + " break\n", + " \n", + " for hook in hooks:\n", + " hook.remove()\n", + " \n", + "def get_tensor_data(output_mode, features):\n", + " if output_mode == \"classification\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)\n", + " elif output_mode == \"regression\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)\n", + "\n", + "\n", + " all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long)\n", + " all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)\n", + " all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)\n", + " all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)\n", + " tensor_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,all_label_ids, all_seq_lengths)\n", + " return tensor_data, all_label_ids\n", + "\n", + "def do_logging(run, student_model, teacher_model, test_dataloader, device, global_step, args, vocab):\n", + " \n", + " if args.bert == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + " else:\n", + " layer_num = 12\n", + " head_num = 12\n", + " \n", + " nb_steps = 0\n", + " \n", + " kl_div_sum = [0 for i in range(layer_num)]\n", + " st_sep_avg_sum = [0 for i in range(layer_num)]; st_cls_avg_sum = [0 for i in range(layer_num)]; tc_sep_avg_sum = [0 for i in range(layer_num)]; tc_cls_avg_sum = [0 for i in range(layer_num)]\n", + " cover_sum = [0 for i in range(layer_num)]\n", + " cover_teacher_sum = [0 for i in range(layer_num)]\n", + " \n", + " batch_num = 0\n", + " \n", + " for batch_ in tqdm(test_dataloader, desc=\"Logging Test\", mininterval=0.01, ascii=True, leave=False):\n", + " batch_ = tuple(t.to(device) for t in batch_)\n", + " \n", + " if batch_num >= 1: # Visualize Attention Map only First Batch \n", + " args.log_map = False\n", + " \n", + " with torch.no_grad():\n", + " input_ids, input_mask, segment_ids, label_id, seq_length = batch_\n", + "\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids, segment_ids, input_mask)\n", + " student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids, segment_ids, input_mask, teacher_probs=teacher_probs)\n", + " \n", + " # Layer\n", + " for i, (student_prob, teacher_prob) in enumerate(zip(student_probs, teacher_probs)): \n", + "\n", + " # Head\n", + " for head in range(head_num):\n", + " \n", + " if args.log_map:\n", + " \n", + " word_list = []\n", + " \n", + " for word in range(seq_length):\n", + " word_list.append(vocab[input_ids[0][word].item()])\n", + " \n", + " student_prob_map = student_prob[0][head][:seq_length,:seq_length].clone().detach().cpu().numpy()\n", + " teacher_prob_map = teacher_prob[0][head][:seq_length,:seq_length].clone().detach().cpu().numpy()\n", + " \n", + " fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(16,8))\n", + " ax1.set_title(f\"{i}th Layer {head}th Head Teacher\")\n", + " heatmap = ax1.pcolor(teacher_prob_map, cmap=plt.cm.Blues)\n", + " \n", + " ax1.set_xticks(numpy.arange(teacher_prob_map.shape[1]) + 0.5, minor=False)\n", + " ax1.set_yticks(numpy.arange(teacher_prob_map.shape[0]) + 0.5, minor=False)\n", + " \n", + " ax1.set_xlim(0, int(teacher_prob_map.shape[1]))\n", + " ax1.set_ylim(0, int(teacher_prob_map.shape[0]))\n", + "\n", + " ax1.invert_yaxis()\n", + " ax1.xaxis.tick_top()\n", + "\n", + " ax1.set_xticklabels(word_list, minor=False)\n", + " ax1.set_yticklabels(word_list, minor=False)\n", + "\n", + " plt.xticks(rotation=45)\n", + " \n", + " ax2.set_title(f\"{i}th Layer {head}th Head Student\")\n", + " heatmap = ax2.pcolor(student_prob_map, cmap=plt.cm.Blues)\n", + "\n", + " ax2.set_xticks(numpy.arange(student_prob_map.shape[1]) + 0.5, minor=False)\n", + " ax2.set_yticks(numpy.arange(student_prob_map.shape[0]) + 0.5, minor=False)\n", + "\n", + " ax2.set_xlim(0, int(student_prob_map.shape[1]))\n", + " ax2.set_ylim(0, int(student_prob_map.shape[0]))\n", + "\n", + " ax2.invert_yaxis()\n", + " ax2.xaxis.tick_top()\n", + "\n", + " ax2.set_xticklabels(word_list, minor=False)\n", + " ax2.set_yticklabels(word_list, minor=False)\n", + "\n", + " plt.xticks(rotation=45)\n", + " \n", + " plt_folder_name = os.path.join(\"plt_storage\" + \"/\" + args.exp_name)\n", + " if not os.path.exists(plt_folder_name):\n", + " os.mkdir(plt_folder_name) \n", + " plt_folder_name = os.path.join(plt_folder_name, f\"step_{global_step}\")\n", + " if not os.path.exists(plt_folder_name):\n", + " os.mkdir(plt_folder_name) \n", + " plt.savefig(plt_folder_name + \"/\" + f\"L{i}_H{head}.png\")\n", + " plt.close()\n", + " \n", + "\n", + " if args.log_metric:\n", + " \n", + " student_prob = student_prob\n", + " teacher_prob = teacher_prob\n", + "\n", + " # Attention Map\n", + " student_attn_map = student_prob[0][head][:seq_length,:seq_length].clone().detach()\n", + " teacher_attn_map = teacher_prob[0][head][:seq_length,:seq_length].clone().detach()\n", + "\n", + " # KL Divergence\n", + " kl_div = F.kl_div(student_attn_map.log(), teacher_attn_map, reduction='batchmean')\n", + " kl_div_sum[i] += kl_div\n", + "\n", + " # Special Token Prob Mean\n", + " st_sep_avg = student_attn_map[:,-1].mean()\n", + " st_cls_avg = student_attn_map[:,0].mean()\n", + " st_sep_avg_sum[i] += st_sep_avg\n", + " st_cls_avg_sum[i] += st_cls_avg\n", + " \n", + " # Ground Truth\n", + " tc_sep_avg = teacher_attn_map[:,-1].mean()\n", + " tc_cls_avg = teacher_attn_map[:,0].mean()\n", + " tc_sep_avg_sum[i] += tc_sep_avg\n", + " tc_cls_avg_sum[i] += tc_cls_avg\n", + "\n", + " # Coverage Test\n", + " coverage_head_sum = 0\n", + " coverage_teacher_head_sum = 0\n", + " for k in range(student_attn_map.shape[0]):\n", + " st_argsort = student_attn_map[k].sort(descending=True)[1]\n", + " tc_argsort = teacher_attn_map[k].sort(descending=True)[1][:args.tc_top_k] # Top-5\n", + " \n", + " max_idx = 0\n", + " for idx in tc_argsort: # Teacher Top-5 \n", + " tmp = torch.where(st_argsort == idx)\n", + " max_idx = max(tmp[0].item(), max_idx)\n", + " \n", + " coverage_ratio = max_idx / student_attn_map.shape[0]\n", + " coverage_teacher_ratio = (args.tc_top_k - 1) / student_attn_map.shape[0]\n", + " coverage_head_sum += coverage_ratio\n", + " coverage_teacher_head_sum += coverage_teacher_ratio\n", + " \n", + " coverage_head = coverage_head_sum / student_attn_map.shape[0]\n", + " coverage_teacher_head = coverage_teacher_head_sum / student_attn_map.shape[0]\n", + " \n", + " cover_sum[i] += coverage_head\n", + " cover_teacher_sum[i] += coverage_teacher_head\n", + " \n", + " nb_steps += 1\n", + " \n", + " batch_num = batch_num + 1\n", + " \n", + " if args.log_metric:\n", + " nb_steps = nb_steps / 12\n", + " \n", + " for l in range(12):\n", + " run[f\"attn/L{l}_KLdiv_mean\"].log(value=kl_div_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_st_SepProb_mean\"].log(value=st_sep_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_st_ClsProb_mean\"].log(value=st_cls_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_tc_SepProb_mean\"].log(value=tc_sep_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_tc_ClsProb_mean\"].log(value=tc_cls_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_st_cover_mean\"].log(value=cover_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_tc_cover_mean\"].log(value=cover_teacher_sum[l] / nb_steps, step=global_step)\n", + "\n", + " args.log_map = True \n", + "\n", + "\n", + "def do_eval(model, task_name, eval_dataloader,\n", + " device, output_mode, eval_labels, num_labels, teacher_model=None):\n", + " eval_loss = 0\n", + " nb_eval_steps = 0\n", + " preds = []\n", + "\n", + " for batch_ in tqdm(eval_dataloader, desc=\"Inference\"):\n", + " batch_ = tuple(t.to(device) for t in batch_)\n", + " \n", + " with torch.no_grad():\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch_\n", + "\n", + " # teacher attnmap test\n", + " if teacher_model is not None:\n", + " logits, teacher_atts, _, teacher_probs, _ = teacher_model(input_ids, segment_ids, input_mask)\n", + " # teacher_probs = 0\n", + " logits, _, _, _, _ = model(input_ids, segment_ids, input_mask, teacher_probs=teacher_probs)\n", + " else:\n", + " logits, _, _, _, _ = model(input_ids, segment_ids, input_mask)\n", + " \n", + " # create eval loss and other metric required by the task\n", + " if output_mode == \"classification\":\n", + " loss_fct = CrossEntropyLoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))\n", + " elif output_mode == \"regression\":\n", + " loss_fct = MSELoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1))\n", + "\n", + " eval_loss += tmp_eval_loss.mean().item()\n", + " nb_eval_steps += 1\n", + " if len(preds) == 0:\n", + " preds.append(logits.detach().cpu().numpy())\n", + " else:\n", + " preds[0] = np.append(\n", + " preds[0], logits.detach().cpu().numpy(), axis=0)\n", + "\n", + " eval_loss = eval_loss / nb_eval_steps\n", + "\n", + " preds = preds[0]\n", + " if output_mode == \"classification\":\n", + " preds = np.argmax(preds, axis=1)\n", + " elif output_mode == \"regression\":\n", + " preds = np.squeeze(preds)\n", + " result = compute_metrics(task_name, preds, eval_labels.numpy())\n", + " result['eval_loss'] = eval_loss\n", + " return result\n", + "\n", + "def soft_cross_entropy(predicts, targets):\n", + " student_likelihood = torch.nn.functional.log_softmax(predicts, dim=-1)\n", + " targets_prob = torch.nn.functional.softmax(targets, dim=-1)\n", + " return torch.sum((- targets_prob * student_likelihood), dim=-1).mean()\n", + "\n", + "processors = {\n", + " \"cola\": ColaProcessor,\n", + " \"mnli\": MnliProcessor,\n", + " \"mnli-mm\": MnliMismatchedProcessor,\n", + " \"mrpc\": MrpcProcessor,\n", + " \"sst-2\": Sst2Processor,\n", + " \"sts-b\": StsbProcessor,\n", + " \"qqp\": QqpProcessor,\n", + " \"qnli\": QnliProcessor,\n", + " \"rte\": RteProcessor \n", + "}\n", + "\n", + "output_modes = {\n", + " \"cola\": \"classification\",\n", + " \"mnli\": \"classification\",\n", + " \"mrpc\": \"classification\",\n", + " \"sst-2\": \"classification\",\n", + " \"sts-b\": \"regression\",\n", + " \"qqp\": \"classification\",\n", + " \"qnli\": \"classification\",\n", + " \"rte\": \"classification\"\n", + "}\n", + "\n", + "default_params = {\n", + " \"cola\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\": 50}, # No Aug : 50 Aug : 400\n", + " \"mnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":8000},\n", + " \"mrpc\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"sst-2\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\":100},\n", + " \"sts-b\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"qqp\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"qnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"rte\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\": 20}\n", + " }\n", + "\n", + "from bertviz import head_view, model_view\n", + "# from bertviz.transformers_neuron_view import BertModel, BertTokenizer\n", + "from bertviz.neuron_view import show\n", + "import bertviz" + ] + }, + { + "cell_type": "markdown", + "id": "aab9bf6b", + "metadata": {}, + "source": [ + "# GLUE Task Selection" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "75f78270", + "metadata": {}, + "outputs": [], + "source": [ + "task_name = \"sts-b\"\n", + "bert_size = \"large\"" + ] + }, + { + "cell_type": "markdown", + "id": "07b94ac6", + "metadata": {}, + "source": [ + "## Model Dir, Device" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7fec849f", + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "student_model_dir = os.path.join(model_dir,task_name)\n", + "student_model_dir = os.path.join(output_dir, task_name, \"quant\", \"ternary_save\")\n", + "# student_model_dir = os.path.join(output_dir, task_name, \"quant\", \"step_2_da_10\") # DA-A4W2 51.2\n", + "teacher_model_dir = os.path.join(model_dir,task_name)\n" + ] + }, + { + "cell_type": "markdown", + "id": "43a99315", + "metadata": {}, + "source": [ + "## Dataset " + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "3fb545e0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "04/01 06:45:11 PM Writing example 0 of 1500\n", + "04/01 06:45:11 PM *** Example ***\n", + "04/01 06:45:11 PM guid: dev-0\n", + "04/01 06:45:11 PM tokens: [CLS] a man with a hard hat is dancing . [SEP] a man wearing a hard hat is dancing . [SEP]\n", + "04/01 06:45:11 PM input_ids: 101 1037 2158 2007 1037 2524 6045 2003 5613 1012 102 1037 2158 4147 1037 2524 6045 2003 5613 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "04/01 06:45:11 PM input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "04/01 06:45:11 PM segment_ids: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "04/01 06:45:11 PM label: 5.000\n", + "04/01 06:45:11 PM label_id: 5.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_2198397/3831030937.py:189: DeprecationWarning: an integer is required (got type float). Implicit conversion to integers using __int__ is deprecated, and may be removed in a future version of Python.\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)\n" + ] + } + ], + "source": [ + "# Processor & Task Info\n", + "processor = processors[task_name]()\n", + "output_mode = output_modes[task_name]\n", + "label_list = processor.get_labels()\n", + "num_labels = len(label_list)\n", + "\n", + "if task_name in default_params:\n", + " batch_size = default_params[task_name][\"batch_size\"]\n", + " max_seq_length = default_params[task_name][\"max_seq_length\"]\n", + " eval_step = default_params[task_name][\"eval_step\"]\n", + " \n", + "# Tokenizer\n", + "tokenizer = BertTokenizer.from_pretrained(teacher_model_dir, do_lower_case=True)\n", + "\n", + "\n", + "# Load Dataset\n", + "data_dir = os.path.join(\"data\",task_name)\n", + "processed_data_dir = os.path.join(data_dir,'preprocessed')\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)\n", + "eval_features = convert_examples_to_features(eval_examples, label_list, max_seq_length, tokenizer, output_mode)\n", + "# dev_file = train_file = os.path.join(processed_data_dir,'dev.pkl') \n", + "# eval_features = pickle.load(open(dev_file,'rb'))\n", + "\n", + "eval_data, eval_labels = get_tensor_data(\"classification\", eval_features)\n", + "eval_sampler = SequentialSampler(eval_data)\n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=32)\n", + "eval_data, eval_labels = get_tensor_data(output_mode, eval_features)\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "3f3b63b5", + "metadata": {}, + "source": [ + "# Model Build" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "0c3a8929", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "04/01 06:44:58 PM Loading model models/BERT_large/sts-b/pytorch_model.bin\n", + "04/01 06:44:59 PM loading model...\n", + "04/01 06:44:59 PM done!\n", + "04/01 06:44:59 PM loading configuration file output/BERT_large/sts-b/quant/ternary_save/config.json\n", + "04/01 06:45:05 PM Loading model output/BERT_large/sts-b/quant/ternary_save/pytorch_model.bin\n", + "04/01 06:45:06 PM loading model...\n", + "04/01 06:45:06 PM done!\n", + "\n" + ] + } + ], + "source": [ + "build_tc = 1\n", + "build_st = 1\n", + "\n", + "if build_tc:\n", + " # Teacher Model Build\n", + " teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + " teacher_model.to(device)\n", + " teacher_model.eval()\n", + " model = teacher_model\n", + "\n", + "if build_st:\n", + " # Student Model Build\n", + " student_config = BertConfig.from_pretrained(student_model_dir,\n", + " quantize_act=True,\n", + " quantize_weight=True,\n", + " weight_bits = 2, # Always Ternary when \"quantize_weight = True\"\n", + " input_bits = 8,\n", + " clip_val = 2.5,\n", + " quantize = True,\n", + " ffn_q_1 = True,\n", + " ffn_q_2 = True,\n", + " qkv_q = True,\n", + " emb_q = True,\n", + " cls_q = True,\n", + " clipping = False,\n", + " layer_num = -1,\n", + " mean_scale = 0.7,\n", + " quantizer = \"ternary\",\n", + " act_quantizer = \"ternary\",\n", + " init_scaling = 1,\n", + " clip_ratio = 1,\n", + " gradient_scaling = False,\n", + " clip_method = \"minmax\",\n", + " teacher_attnmap = False,\n", + " parks = False,\n", + " stop_grad = False,\n", + " qk_FP = False,\n", + " map=False,\n", + " act_method = \"clipping\"\n", + " )\n", + "\n", + " student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config, num_labels=num_labels)\n", + " student_model.to(device)\n", + " model = student_model\n", + " print()\n", + "\n", + " # Quantization Option ACT/WEIGHT\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, (QuantizeLinear, QuantizeAct, ClipLinear)): \n", + " module.act_flag = True\n", + " module.weight_flag = True" + ] + }, + { + "cell_type": "markdown", + "id": "cc2361c6", + "metadata": {}, + "source": [ + "## Activation Quantization Clip Value Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a32aa6cf", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# for name, module in student_model.named_modules():\n", + "# if isinstance(module, (QuantizeLinear, QuantizeAct, ClipLinear)): \n", + "# module.act_flag = False\n", + "# module.weight_flag = False\n", + " \n", + "# cv_initialize(student_model, eval_dataloader, torch.Tensor([0.005]), device)\n", + "\n", + "# # for name, module in student_model.named_modules():\n", + "# # if isinstance(module, (QuantizeLinear, QuantizeAct, ClipLinear)): \n", + "# # module.act_flag = True\n", + "# # module.weight_flag = False" + ] + }, + { + "cell_type": "markdown", + "id": "34a0dfd7", + "metadata": {}, + "source": [ + "## Model Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "0c515c68", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Student Model Inferece\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inference: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 47/47 [00:27<00:00, 1.69it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Student Result : {'pearson': 0.8887880506300738, 'spearmanr': 0.8850681732784779, 'corr': 0.8869281119542758, 'eval_loss': 0.6736781127909397}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "eval_st = 1\n", + "eval_tc = 0\n", + "\n", + "if eval_st:\n", + " print(\"Student Model Inferece\")\n", + " student_model.eval()\n", + " student_result = do_eval(student_model, task_name, eval_dataloader, device, output_mode, eval_labels, num_labels, teacher_model=teacher_model)\n", + " print(f\"Student Result : {student_result}\")\n", + "\n", + "if eval_tc:\n", + " print(\"Teacher Model Inferece\")\n", + " teacher_result = do_eval(teacher_model, task_name, eval_dataloader, device, output_mode, eval_labels, num_labels)\n", + " print(f\"Teacher Result : {teacher_result}\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "265abf87", + "metadata": {}, + "source": [ + "## BERTViz Model View" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "0eedf469", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input_ids : tensor([[ 101, 1037, 2402, 2775, 2003, 5559, 1037, 3586, 1012, 102, 1037, 2775,\n", + " 2003, 5559, 1037, 3586, 1012, 102]], device='cuda:0')\n", + "tokens : ['[CLS]', 'a', 'young', 'child', 'is', 'riding', 'a', 'horse', '.', '[SEP]', 'a', 'child', 'is', 'riding', 'a', 'horse', '.', '[SEP]']\n", + "A : a young child is riding a horse . \n", + "B : a child is riding a horse . \n", + "tensor([ 9, 17], device='cuda:0')\n" + ] + } + ], + "source": [ + "# Sampling Sentence \n", + "i = 0 \n", + "num = 2\n", + "for step, batch in enumerate(eval_dataloader):\n", + " model.train()\n", + " \n", + " batch = tuple(t.to(device) for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch\n", + " i = i + 1\n", + " if i == num:\n", + " break\n", + "\n", + "seq_length = seq_lengths.item()\n", + "input_ids_sliced = input_ids[:,:seq_length]\n", + "input_id = []\n", + "for i in input_ids_sliced[0]:\n", + " input_id.append(i.item())\n", + "tokens = tokenizer.convert_ids_to_tokens(input_id)\n", + "\n", + "\n", + "\n", + "sample_sentence_a = str()\n", + "sample_sentence_b = str()\n", + "index = 0\n", + "\n", + "for i, word in enumerate(tokens[1:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_a += word\n", + " sample_sentence_a += \" \"\n", + "index = i\n", + "\n", + "for i, word in enumerate(tokens[index+2:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_b += word\n", + " sample_sentence_b += \" \"\n", + "\n", + "sep_index = torch.where(input_ids[0] == 102)[0]\n", + "\n", + "if len(sample_sentence_b) > 1:\n", + " sample_sentence_b_start = segment_ids[0].tolist().index(1)\n", + "\n", + "print(f\"input_ids : {input_ids_sliced}\")\n", + "print(f\"tokens : {tokens}\")\n", + "print(f\"A : {sample_sentence_a}\")\n", + "print(f\"B : {sample_sentence_b}\")\n", + "print(sep_index)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "780d1da2", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from bertviz.transformers_neuron_view import BertModel, BertTokenizer\n", + "from bertviz.neuron_view import show\n", + "\n", + "bertviz_neuron_tc = 0\n", + "bertviz_neuron_st = 0\n", + "bertviz_model_tc = 1\n", + "bertviz_model_st = 1\n", + "\n", + "# Quantization Setting\n", + "if bertviz_neuron_st or bertviz_model_st:\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, (QuantizeLinear, ClipLinear)): \n", + " module.act_flag = False\n", + " module.weight_flag = False\n", + " if isinstance(module, QuantizeAct): \n", + " module.act_flag = False\n", + " module.weight_flag = False\n", + "\n", + "if bertviz_neuron_tc or bertviz_neuron_st:\n", + " if bertviz_neuron_st:\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, BertSelfAttention): \n", + " module.output_bertviz = True\n", + " if bertviz_neuron_tc:\n", + " for name, module in teacher_model.named_modules():\n", + " if isinstance(module, FP_BertSelfAttention): \n", + " module.output_bertviz = True\n", + "\n", + " model_type = 'bert'\n", + " model_version = 'bert-base-uncased'\n", + " \n", + " tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=True)\n", + " if bertviz_neuron_tc:\n", + " if len(sample_sentence_b) > 1:\n", + " show(teacher_model.cpu(), model_type, tokenizer, sample_sentence_a, sample_sentence_b, display_mode=\"light\")\n", + " else:\n", + " show(teacher_model.cpu(), model_type, tokenizer, sample_sentence_a,display_mode=\"light\")\n", + " if bertviz_neuron_st:\n", + " if len(sample_sentence_b) > 1:\n", + " show(student_model.cpu(), model_type, tokenizer, sample_sentence_a, sample_sentence_b, display_mode=\"light\")\n", + " else:\n", + " show(student_model.cpu(), model_type, tokenizer, sample_sentence_a,display_mode=\"light\")\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else:\n", + " layer_num = 12\n", + " head_num = 12\n", + " \n", + "all_layers = list(range(layer_num))\n", + "layers_to_show = all_layers[18:]\n", + "\n", + "if bertviz_model_tc or bertviz_model_st:\n", + " \n", + " if bertviz_model_tc:\n", + " print(\"teacher_map\")\n", + " for name, module in teacher_model.named_modules():\n", + " if isinstance(module, FP_BertSelfAttention): \n", + " module.output_bertviz = False\n", + " teacher_model.eval()\n", + " teacher_model.to(device)\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + " model_view(teacher_probs, tokens, include_layers=layers_to_show, display_mode=\"light\")\n", + " \n", + " if bertviz_model_st:\n", + " print(\"student_map\")\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, BertSelfAttention): \n", + " module.output_bertviz = False\n", + " student_model.eval()\n", + " student_model.to(device)\n", + " student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + " model_view(student_probs, tokens, sample_sentence_b_start,include_layers=layers_to_show, display_mode=\"light\")# , include_layers=[0, 1])\n", + " \n", + " \n" + ] + }, + { + "cell_type": "markdown", + "id": "6a0039fe", + "metadata": {}, + "source": [ + "## Forward Check" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "64cf6575", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "KL DIV CHECK\n", + "2.9676687717437744\n", + "5.659456253051758\n", + "3.098494052886963\n", + "2.3057239055633545\n", + "2.587374210357666\n", + "1.6189770698547363\n", + "1.2007269859313965\n", + "1.662299633026123\n", + "2.1250391006469727\n", + "2.056593418121338\n", + "3.305050849914551\n", + "4.6170477867126465\n", + "4.530083656311035\n", + "4.024961948394775\n", + "4.323973178863525\n", + "3.5094804763793945\n", + "3.292818784713745\n", + "2.7897772789001465\n", + "4.441211700439453\n", + "8.378562927246094\n", + "20.496295928955078\n", + "37.17641830444336\n", + "40.135982513427734\n", + "76.01957702636719\n" + ] + } + ], + "source": [ + "from torch.nn import MSELoss\n", + "mse_func = MSELoss()\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else:\n", + " layer_num = 12\n", + " head_num = 12\n", + "\n", + "\n", + "attention_pattern_check = 0\n", + "cover_mean_check = 0\n", + "kl_div_check = 1\n", + "mse_check = 0\n", + "attnmap_mse_check = 0\n", + "\n", + "exclude_sep = 0\n", + "\n", + "for name, module in student_model.named_modules():\n", + " if isinstance(module, BertSelfAttention): \n", + " module.output_bertviz = False\n", + "for name, module in teacher_model.named_modules():\n", + " if isinstance(module, FP_BertSelfAttention): \n", + " module.output_bertviz = False\n", + " \n", + "for name, module in student_model.named_modules():\n", + " if isinstance(module, (QuantizeLinear, ClipLinear, QuantizeAct)): \n", + " module.act_flag = True\n", + " module.weight_flag = True\n", + "\n", + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "\n", + "student_model.eval()\n", + "teacher_model.eval()\n", + "student_model.to(device)\n", + "teacher_model.to(device)\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + "student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + "probs = teacher_probs\n", + "if attention_pattern_check:\n", + " print(\"Attention mean CHECK\")\n", + " for i in range(layer_num):\n", + " if len(sep_index) == 2:\n", + " print((probs[i][0,:,:,sep_index[0]].mean() + probs[i][0,:,:,sep_index[1]].mean()).item())\n", + " else:\n", + " print(probs[i][0,:,:,sep_index[0]].mean().item())\n", + " \n", + "if cover_mean_check:\n", + " print(\"COVER MEAN CHECK\")\n", + " top_k = 5\n", + "\n", + " for i in range(layer_num):\n", + " teacher = teacher_probs[i][0]\n", + " student = student_probs[i][0]\n", + "\n", + " head_sum = 0\n", + " for h in range(head_num):\n", + " coverage_head_sum = 0\n", + " for row in range(seq_length-1):\n", + " if exclude_sep:\n", + " tc_argsort = teacher[h][:seq_length-1,:seq_length-1].sort(descending=True)[1][row][:top_k] # top-k\n", + " st_argsort = student[h][:seq_length-1,:seq_length-1].sort(descending=True)[1][row]\n", + " tc_argsort = teacher[h].sort(descending=True)[1][row][:top_k] # top-k\n", + " st_argsort = student[h].sort(descending=True)[1][row]\n", + "\n", + " max_idx = 0\n", + " for idx in tc_argsort:\n", + " tmp = torch.where(st_argsort == idx)\n", + " max_idx = max(tmp[0].item(), max_idx)\n", + "\n", + " coverage_ratio = max_idx / student.shape[1]\n", + " coverage_head_sum += coverage_ratio\n", + "\n", + " # print(f\"H{h} : {coverage_head_sum/seq_length}\")\n", + "\n", + " head_sum += coverage_head_sum / seq_length\n", + " print(head_sum / head_num)\n", + "\n", + "if kl_div_check:\n", + " print(\"KL DIV CHECK\")\n", + " for i in range(layer_num):\n", + " if exclude_sep:\n", + " if len(sep_index) == 2:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000; teacher_atts[i][:,:,:,sep_index[1]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000; student_atts[i][:,:,:,sep_index[1]] = -100000\n", + " else:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000\n", + " \n", + " teacher = torch.nn.Softmax(dim=-1)(teacher_atts[i])\n", + " student = torch.nn.Softmax(dim=-1)(student_atts[i])\n", + " \n", + " student = torch.clamp_min(student, 1e-8)\n", + " teacher = torch.clamp_min(teacher, 1e-8)\n", + " else: \n", + " teacher = teacher_probs[i]\n", + " student = student_probs[i]\n", + " \n", + " neg_cross_entropy = teacher * torch.log(student) \n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " # p(t) log p(t) = negative entropy\n", + " neg_entropy = teacher * torch.log(teacher) \n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " kld_loss = neg_entropy - neg_cross_entropy\n", + "\n", + " kld_loss_sum = torch.sum(kld_loss)\n", + " print(kld_loss_sum.item())\n", + "\n", + "if mse_check:\n", + " for i in range(layer_num):\n", + " print(mse_func(teacher_atts[i], student_atts[i]).item())\n", + " \n", + "if attnmap_mse_check:\n", + " for i in range(layer_num):\n", + " if exclude_sep:\n", + " if len(sep_index) == 2:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000; teacher_atts[i][:,:,:,sep_index[1]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000; student_atts[i][:,:,:,sep_index[1]] = -100000\n", + " else:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000\n", + " \n", + " teacher = torch.nn.Softmax(dim=-1)(teacher_atts[i])\n", + " student = torch.nn.Softmax(dim=-1)(student_atts[i])\n", + " print(mse_func(teacher, student).item())\n", + " else: \n", + " print(mse_func(teacher_probs[i], student_probs[i]).item())\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "e1cfe75a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.155358076095581\n" + ] + } + ], + "source": [ + "\n", + "i = 9\n", + "teacher = teacher_probs[1][:,i,:,:]\n", + "student = student_probs[1][:,i,:,:]\n", + "\n", + "\n", + "\n", + "neg_cross_entropy = teacher * torch.log(student) \n", + "neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + "neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + "# p(t) log p(t) = negative entropy\n", + "neg_entropy = teacher * torch.log(teacher) \n", + "neg_entropy = torch.sum(neg_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + "neg_entropy = torch.sum(neg_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + "kld_loss = neg_entropy - neg_cross_entropy\n", + "\n", + "kld_loss_sum = torch.sum(kld_loss)\n", + "print(kld_loss_sum.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "81a859c7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.4594290256500244\n", + "0.5127482414245605\n", + "0.5085060596466064\n", + "0.44210290908813477\n", + "0.3044252395629883\n", + "0.4317352771759033\n", + "0.43944454193115234\n", + "0.34624576568603516\n", + "0.45526742935180664\n", + "0.4619622230529785\n", + "0.2999594211578369\n", + "0.5414872169494629\n", + "0.4033381938934326\n", + "0.3882777690887451\n", + "0.3862929344177246\n", + "0.29909467697143555\n" + ] + } + ], + "source": [ + "\n", + "head = 7\n", + "for head in range(16):\n", + " teacher = teacher_probs[23][:,head,:,:]\n", + " student = student_probs[23][:,head,:,:]\n", + " neg_cross_entropy = teacher * torch.log(student) \n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " # p(t) log p(t) = negative entropy\n", + " neg_entropy = teacher * torch.log(teacher) \n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " kld_loss = neg_entropy - neg_cross_entropy\n", + "\n", + " kld_loss_sum = torch.sum(kld_loss)\n", + " print(kld_loss_sum.item())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "dab8d045", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[2.1772e-02, 1.2781e-02, 4.9408e-03, ..., 2.0158e-02,\n", + " 6.9626e-02, 1.4999e-02],\n", + " [1.0919e-03, 2.1085e-03, 1.0577e-03, ..., 1.6647e-04,\n", + " 4.3606e-04, 1.8829e-01],\n", + " [2.0435e-04, 2.2089e-04, 1.0611e-04, ..., 4.0684e-05,\n", + " 4.6589e-05, 1.9174e-01],\n", + " ...,\n", + " [1.2242e-03, 9.6587e-05, 5.1341e-05, ..., 2.4791e-03,\n", + " 8.4426e-04, 2.3197e-01],\n", + " [3.1989e-05, 1.6445e-05, 4.2517e-06, ..., 5.4571e-05,\n", + " 1.5931e-05, 1.9737e-01],\n", + " [2.7168e-02, 1.6892e-02, 1.5799e-02, ..., 1.5508e-02,\n", + " 2.3137e-02, 4.5687e-02]]], device='cuda:0', grad_fn=)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "student_probs[23][:,7,:,:]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "857ecb5c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.8724, 0.4891, 0.6094, 0.6159, 0.4543, 0.4242, 0.5964, 0.1814, 0.7470,\n", + " 0.3960, 0.4766, 0.3705, 0.6179, 0.2615, 0.6612, 0.4897, 0.6067, 0.3899,\n", + " 0.4389, 0.7189, 0.3123, 0.2026, 0.3890, 0.2045, 0.5183, 0.4821, 0.4648,\n", + " 0.2569, 0.2587, 0.4544, 0.2224, 0.6254, 0.7420, 0.1829, 0.5220, 0.2288,\n", + " 0.6594, 0.6613, 0.4734, 0.7735, 0.7583, 0.2122, 0.6728, 0.2626, 0.7567,\n", + " 0.2797, 0.7548, 0.4164, 0.7275]], device='cuda:0',\n", + " grad_fn=)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "teacher_probs[1][:,9,:,0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2440e7dd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/BERTviz.ipynb b/notebooks/BERTviz.ipynb new file mode 100644 index 0000000..5ba373c --- /dev/null +++ b/notebooks/BERTviz.ipynb @@ -0,0 +1,2724 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "9f99d9c2", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import absolute_import, division, print_function\n", + "\n", + "import pprint\n", + "import argparse\n", + "import logging\n", + "import os\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\" # Set GPU Index to use\n", + "os.environ['CUDA_LAUNCH_BLOCKING'] = \"1\"\n", + "import random\n", + "import sys\n", + "import pickle\n", + "import copy\n", + "import collections\n", + "import math\n", + "\n", + "import numpy as np\n", + "import numpy\n", + "import torch\n", + "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler,TensorDataset\n", + "# from torch.utils.tensorboard import SummaryWriter\n", + "\n", + "from torch.nn import CrossEntropyLoss, MSELoss\n", + "from tqdm import tqdm\n", + "from transformer import BertForSequenceClassification,WEIGHTS_NAME, CONFIG_NAME\n", + "from transformer.modeling_quant import BertForSequenceClassification as QuantBertForSequenceClassification\n", + "from transformer import BertTokenizer\n", + "from transformer import BertAdam\n", + "from transformer import BertConfig\n", + "from transformer import QuantizeLinear, QuantizeAct, BertSelfAttention, FP_BertSelfAttention, ClipLinear\n", + "from utils_glue import *\n", + "from bertviz import model_view\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import torch.nn.functional as F\n", + "\n", + "class AverageMeter(object):\n", + " \"\"\"Computes and stores the average and current value\"\"\"\n", + " def __init__(self):\n", + " self.reset()\n", + "\n", + " def reset(self):\n", + " self.val = 0\n", + " self.avg = 0 \n", + " self.sum = 0\n", + " self.count = 0\n", + "\n", + " def update(self, val, n=1):\n", + " self.val = val\n", + " self.sum += val * n\n", + " self.count += n\n", + " self.avg = self.sum / self.count\n", + "\n", + "def cv_initialize(model, loader, ratio, device):\n", + " \n", + " def initialize_hook(module, input, output):\n", + " if isinstance(module, (QuantizeLinear, QuantizeAct, ClipLinear)):\n", + " \"\"\"KDLSQ-BERT ACT Quant init Method\n", + " Ref: https://arxiv.org/abs/2101.05938\n", + " \"\"\"\n", + " if not isinstance(input, torch.Tensor):\n", + " input = input[0]\n", + " \n", + " n = torch.numel(input)\n", + " input_sorted, index = torch.sort(input.reshape(-1), descending=False)\n", + " \n", + " index_min = torch.round(ratio * n / 2)\n", + " index_max = n - index_min\n", + " \n", + " s_init = (input_sorted[int(index_min)].to(device), input_sorted[int(index_max)].to(device))\n", + " \n", + " # MATPLOT\n", + " \n", + " fig, [ax1, ax2, ax3] = plt.subplots(1,3, figsize=(16, 4)) \n", + " \n", + " sns.histplot(data=input.reshape(-1).detach().cpu().numpy(), kde = True, bins=100, ax=ax1)\n", + " sns.rugplot(data=input.reshape(-1).detach().cpu().numpy(), ax=ax1)\n", + " sns.histplot(data=module.weight.reshape(-1).detach().cpu().numpy(), kde = True, bins=100, ax=ax2)\n", + " sns.rugplot(data=module.weight.reshape(-1).detach().cpu().numpy(), ax=ax2)\n", + " sns.histplot(data=output.reshape(-1).detach().cpu().numpy(), kde = True, bins=100, ax=ax3)\n", + " sns.rugplot(data=output.reshape(-1).detach().cpu().numpy(), ax=ax3)\n", + " # fig, [ax1, ax2] = plt.subplots(1,2, figsize=(12, 4)) \n", + " \n", + " # sns.distplot(input.reshape(-1).detach().cpu().numpy() , hist = True, rug = True, kde = True, bins=100, norm_hist=False, kde_kws=dict(linewidth=0.5), rug_kws=dict(linewidth=0.5), ax=ax1)\n", + " # sns.distplot(output.reshape(-1).detach().cpu().numpy() , hist = True, rug = True, kde = True, bins=100, norm_hist=False, kde_kws=dict(linewidth=0.5), rug_kws=dict(linewidth=0.5), ax=ax2)\n", + " # # plt.axvline(x=s_init[0].detach().cpu().numpy(), color='r', linestyle='--')\n", + " # # plt.axvline(x=s_init[1].detach().cpu().numpy(), color='r', linestyle='--')\n", + "\n", + " ax1.set_xlabel(\"Input Activation\")\n", + " # ax2.set_xlabel(\"Output Activation\")\n", + " ax2.set_xlabel(\"Module Weight\")\n", + " ax3.set_xlabel(\"Output Activation\")\n", + " \n", + " ax1.set_ylabel(\"Density\")\n", + " ax2.set_ylabel(\"Density\")\n", + " ax3.set_ylabel(\"Density\")\n", + "\n", + " ax1.set_title(f\"{module.name} Input ACT histogram\")\n", + " # ax2.set_title(f\"{module.name} Output ACT histogram\")\n", + " ax2.set_title(f\"{module.name} Weight histogram\")\n", + " ax3.set_title(f\"{module.name} Output ACT histogram\")\n", + " # plt.savefig(f\"plt_storage/hook_inputs/sst-2-fp/{module.name}.png\")\n", + " plt.show()\n", + " plt.close(fig)\n", + " # module.clip_initialize(s_init)\n", + " # logger.info(f\"{module} : min {s_init[0].item()} max {s_init[1].item()}\") \n", + "\n", + " \n", + " hooks = []\n", + "\n", + " for name, module in model.named_modules():\n", + " hook = module.register_forward_hook(initialize_hook)\n", + " hooks.append(hook)\n", + " \n", + " model.train()\n", + " model.to(device)\n", + " \n", + " for step, batch in enumerate(loader):\n", + " batch = tuple(t.to(\"cuda\") for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch \n", + " with torch.no_grad():\n", + " student_logits, student_atts, student_reps, student_probs, student_values = model(input_ids, segment_ids, input_mask, teacher_probs=None)\n", + " break\n", + " \n", + " for hook in hooks:\n", + " hook.remove()\n", + "\n", + "def str2bool(v):\n", + " if isinstance(v, bool):\n", + " return v\n", + " if v.lower() in ('yes', 'true', 't', 'y', '1'):\n", + " return True\n", + " elif v.lower() in ('no', 'false', 'f', 'n', '0'):\n", + " return False\n", + " else:\n", + " raise argparse.ArgumentTypeError('Boolean value expected.')\n", + "\n", + "def load_vocab(vocab_file):\n", + " \"\"\"Loads a vocabulary file into a dictionary.\"\"\"\n", + " vocab = collections.OrderedDict()\n", + " index = 0\n", + " with open(vocab_file, \"r\", encoding=\"utf-8\") as reader:\n", + " while True:\n", + " token = reader.readline()\n", + " if not token:\n", + " break\n", + " token = token.strip()\n", + " #vocab[token] = index\n", + " vocab[index] = token\n", + " index += 1\n", + " return vocab\n", + "\n", + "def attention_pattern(model, loader, device):\n", + " \n", + " def initialize_hook(module, input, output):\n", + " if isinstance(module, BertSelfAttention):\n", + " \n", + " attn_mask = input[1]\n", + " attention_output = output[-2]\n", + " \n", + " seq_length = (attn_mask == 0).sum()\n", + " \n", + " print(attention_output[0,:,:seq_length,seq_length-1].mean().item())\n", + " \n", + "\n", + " hooks = []\n", + "\n", + " for name, module in model.named_modules():\n", + " hook = module.register_forward_hook(initialize_hook)\n", + " hooks.append(hook)\n", + " \n", + " model.eval()\n", + " model.to(device)\n", + " \n", + " for step, batch in enumerate(loader):\n", + " batch = tuple(t.to(\"cuda\") for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch \n", + " with torch.no_grad():\n", + " student_logits, student_atts, student_reps, student_probs, student_values = model(input_ids, segment_ids, input_mask)\n", + " break\n", + " \n", + " for hook in hooks:\n", + " hook.remove()\n", + " \n", + "def get_tensor_data(output_mode, features):\n", + " if output_mode == \"classification\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)\n", + " elif output_mode == \"regression\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)\n", + "\n", + "\n", + " all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long)\n", + " all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)\n", + " all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)\n", + " all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)\n", + " tensor_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,all_label_ids, all_seq_lengths)\n", + " return tensor_data, all_label_ids\n", + "\n", + "def do_logging(run, student_model, teacher_model, test_dataloader, device, global_step, args, vocab):\n", + " \n", + " if args.bert == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + " else:\n", + " layer_num = 12\n", + " head_num = 12\n", + " \n", + " nb_steps = 0\n", + " \n", + " kl_div_sum = [0 for i in range(layer_num)]\n", + " st_sep_avg_sum = [0 for i in range(layer_num)]; st_cls_avg_sum = [0 for i in range(layer_num)]; tc_sep_avg_sum = [0 for i in range(layer_num)]; tc_cls_avg_sum = [0 for i in range(layer_num)]\n", + " cover_sum = [0 for i in range(layer_num)]\n", + " cover_teacher_sum = [0 for i in range(layer_num)]\n", + " \n", + " batch_num = 0\n", + " \n", + " for batch_ in tqdm(test_dataloader, desc=\"Logging Test\", mininterval=0.01, ascii=True, leave=False):\n", + " batch_ = tuple(t.to(device) for t in batch_)\n", + " \n", + " if batch_num >= 1: # Visualize Attention Map only First Batch \n", + " args.log_map = False\n", + " \n", + " with torch.no_grad():\n", + " input_ids, input_mask, segment_ids, label_id, seq_length = batch_\n", + "\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids, segment_ids, input_mask)\n", + " student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids, segment_ids, input_mask, teacher_probs=teacher_probs)\n", + " \n", + " # Layer\n", + " for i, (student_prob, teacher_prob) in enumerate(zip(student_probs, teacher_probs)): \n", + "\n", + " # Head\n", + " for head in range(head_num):\n", + " \n", + " if args.log_map:\n", + " \n", + " word_list = []\n", + " \n", + " for word in range(seq_length):\n", + " word_list.append(vocab[input_ids[0][word].item()])\n", + " \n", + " student_prob_map = student_prob[0][head][:seq_length,:seq_length].clone().detach().cpu().numpy()\n", + " teacher_prob_map = teacher_prob[0][head][:seq_length,:seq_length].clone().detach().cpu().numpy()\n", + " \n", + " fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(16,8))\n", + " ax1.set_title(f\"{i}th Layer {head}th Head Teacher\")\n", + " heatmap = ax1.pcolor(teacher_prob_map, cmap=plt.cm.Blues)\n", + " \n", + " ax1.set_xticks(numpy.arange(teacher_prob_map.shape[1]) + 0.5, minor=False)\n", + " ax1.set_yticks(numpy.arange(teacher_prob_map.shape[0]) + 0.5, minor=False)\n", + " \n", + " ax1.set_xlim(0, int(teacher_prob_map.shape[1]))\n", + " ax1.set_ylim(0, int(teacher_prob_map.shape[0]))\n", + "\n", + " ax1.invert_yaxis()\n", + " ax1.xaxis.tick_top()\n", + "\n", + " ax1.set_xticklabels(word_list, minor=False)\n", + " ax1.set_yticklabels(word_list, minor=False)\n", + "\n", + " plt.xticks(rotation=45)\n", + " \n", + " ax2.set_title(f\"{i}th Layer {head}th Head Student\")\n", + " heatmap = ax2.pcolor(student_prob_map, cmap=plt.cm.Blues)\n", + "\n", + " ax2.set_xticks(numpy.arange(student_prob_map.shape[1]) + 0.5, minor=False)\n", + " ax2.set_yticks(numpy.arange(student_prob_map.shape[0]) + 0.5, minor=False)\n", + "\n", + " ax2.set_xlim(0, int(student_prob_map.shape[1]))\n", + " ax2.set_ylim(0, int(student_prob_map.shape[0]))\n", + "\n", + " ax2.invert_yaxis()\n", + " ax2.xaxis.tick_top()\n", + "\n", + " ax2.set_xticklabels(word_list, minor=False)\n", + " ax2.set_yticklabels(word_list, minor=False)\n", + "\n", + " plt.xticks(rotation=45)\n", + " \n", + " plt_folder_name = os.path.join(\"plt_storage\" + \"/\" + args.exp_name)\n", + " if not os.path.exists(plt_folder_name):\n", + " os.mkdir(plt_folder_name) \n", + " plt_folder_name = os.path.join(plt_folder_name, f\"step_{global_step}\")\n", + " if not os.path.exists(plt_folder_name):\n", + " os.mkdir(plt_folder_name) \n", + " plt.savefig(plt_folder_name + \"/\" + f\"L{i}_H{head}.png\")\n", + " plt.close()\n", + " \n", + "\n", + " if args.log_metric:\n", + " \n", + " student_prob = student_prob\n", + " teacher_prob = teacher_prob\n", + "\n", + " # Attention Map\n", + " student_attn_map = student_prob[0][head][:seq_length,:seq_length].clone().detach()\n", + " teacher_attn_map = teacher_prob[0][head][:seq_length,:seq_length].clone().detach()\n", + "\n", + " # KL Divergence\n", + " kl_div = F.kl_div(student_attn_map.log(), teacher_attn_map, reduction='batchmean')\n", + " kl_div_sum[i] += kl_div\n", + "\n", + " # Special Token Prob Mean\n", + " st_sep_avg = student_attn_map[:,-1].mean()\n", + " st_cls_avg = student_attn_map[:,0].mean()\n", + " st_sep_avg_sum[i] += st_sep_avg\n", + " st_cls_avg_sum[i] += st_cls_avg\n", + " \n", + " # Ground Truth\n", + " tc_sep_avg = teacher_attn_map[:,-1].mean()\n", + " tc_cls_avg = teacher_attn_map[:,0].mean()\n", + " tc_sep_avg_sum[i] += tc_sep_avg\n", + " tc_cls_avg_sum[i] += tc_cls_avg\n", + "\n", + " # Coverage Test\n", + " coverage_head_sum = 0\n", + " coverage_teacher_head_sum = 0\n", + " for k in range(student_attn_map.shape[0]):\n", + " st_argsort = student_attn_map[k].sort(descending=True)[1]\n", + " tc_argsort = teacher_attn_map[k].sort(descending=True)[1][:args.tc_top_k] # Top-5\n", + " \n", + " max_idx = 0\n", + " for idx in tc_argsort: # Teacher Top-5 \n", + " tmp = torch.where(st_argsort == idx)\n", + " max_idx = max(tmp[0].item(), max_idx)\n", + " \n", + " coverage_ratio = max_idx / student_attn_map.shape[0]\n", + " coverage_teacher_ratio = (args.tc_top_k - 1) / student_attn_map.shape[0]\n", + " coverage_head_sum += coverage_ratio\n", + " coverage_teacher_head_sum += coverage_teacher_ratio\n", + " \n", + " coverage_head = coverage_head_sum / student_attn_map.shape[0]\n", + " coverage_teacher_head = coverage_teacher_head_sum / student_attn_map.shape[0]\n", + " \n", + " cover_sum[i] += coverage_head\n", + " cover_teacher_sum[i] += coverage_teacher_head\n", + " \n", + " nb_steps += 1\n", + " \n", + " batch_num = batch_num + 1\n", + " \n", + " if args.log_metric:\n", + " nb_steps = nb_steps / 12\n", + " \n", + " for l in range(12):\n", + " run[f\"attn/L{l}_KLdiv_mean\"].log(value=kl_div_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_st_SepProb_mean\"].log(value=st_sep_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_st_ClsProb_mean\"].log(value=st_cls_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_tc_SepProb_mean\"].log(value=tc_sep_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_tc_ClsProb_mean\"].log(value=tc_cls_avg_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_st_cover_mean\"].log(value=cover_sum[l] / nb_steps, step=global_step)\n", + " run[f\"attn/L{l}_tc_cover_mean\"].log(value=cover_teacher_sum[l] / nb_steps, step=global_step)\n", + "\n", + " args.log_map = True \n", + "\n", + "\n", + "def do_eval(model, task_name, eval_dataloader,\n", + " device, output_mode, eval_labels, num_labels, teacher_model=None):\n", + " eval_loss = 0\n", + " nb_eval_steps = 0\n", + " preds = []\n", + "\n", + " for batch_ in tqdm(eval_dataloader, desc=\"Inference\"):\n", + " batch_ = tuple(t.to(device) for t in batch_)\n", + " \n", + " with torch.no_grad():\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch_\n", + "\n", + " # teacher attnmap test\n", + " if teacher_model is not None:\n", + " logits, teacher_atts, _, teacher_probs, _ = teacher_model(input_ids, segment_ids, input_mask)\n", + " # teacher_probs = 0\n", + " logits, _, _, _, _ = model(input_ids, segment_ids, input_mask, teacher_outputs=None)\n", + " else:\n", + " logits, _, _, _, _ = model(input_ids, segment_ids, input_mask)\n", + " \n", + " # create eval loss and other metric required by the task\n", + " if output_mode == \"classification\":\n", + " loss_fct = CrossEntropyLoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))\n", + " elif output_mode == \"regression\":\n", + " loss_fct = MSELoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1))\n", + "\n", + " eval_loss += tmp_eval_loss.mean().item()\n", + " nb_eval_steps += 1\n", + " if len(preds) == 0:\n", + " preds.append(logits.detach().cpu().numpy())\n", + " else:\n", + " preds[0] = np.append(\n", + " preds[0], logits.detach().cpu().numpy(), axis=0)\n", + "\n", + " eval_loss = eval_loss / nb_eval_steps\n", + "\n", + " preds = preds[0]\n", + " if output_mode == \"classification\":\n", + " preds = np.argmax(preds, axis=1)\n", + " elif output_mode == \"regression\":\n", + " preds = np.squeeze(preds)\n", + " result = compute_metrics(task_name, preds, eval_labels.numpy())\n", + " result['eval_loss'] = eval_loss\n", + " return result\n", + "\n", + "def soft_cross_entropy(predicts, targets):\n", + " student_likelihood = torch.nn.functional.log_softmax(predicts, dim=-1)\n", + " targets_prob = torch.nn.functional.softmax(targets, dim=-1)\n", + " return torch.sum((- targets_prob * student_likelihood), dim=-1).mean()\n", + "\n", + "processors = {\n", + " \"cola\": ColaProcessor,\n", + " \"mnli\": MnliProcessor,\n", + " \"mnli-mm\": MnliMismatchedProcessor,\n", + " \"mrpc\": MrpcProcessor,\n", + " \"sst-2\": Sst2Processor,\n", + " \"sts-b\": StsbProcessor,\n", + " \"qqp\": QqpProcessor,\n", + " \"qnli\": QnliProcessor,\n", + " \"rte\": RteProcessor \n", + "}\n", + "\n", + "output_modes = {\n", + " \"cola\": \"classification\",\n", + " \"mnli\": \"classification\",\n", + " \"mrpc\": \"classification\",\n", + " \"sst-2\": \"classification\",\n", + " \"sts-b\": \"regression\",\n", + " \"qqp\": \"classification\",\n", + " \"qnli\": \"classification\",\n", + " \"rte\": \"classification\"\n", + "}\n", + "\n", + "default_params = {\n", + " \"cola\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\": 50}, # No Aug : 50 Aug : 400\n", + " \"mnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":8000},\n", + " \"mrpc\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"sst-2\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\":100},\n", + " \"sts-b\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"qqp\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"qnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"rte\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\": 20}\n", + " }\n", + "\n", + "from bertviz import head_view, model_view\n", + "# from bertviz.transformers_neuron_view import BertModel, BertTokenizer\n", + "from bertviz.neuron_view import show\n", + "import bertviz" + ] + }, + { + "cell_type": "markdown", + "id": "aab9bf6b", + "metadata": {}, + "source": [ + "# GLUE Task Selection" + ] + }, + { + "cell_type": "code", + "execution_count": 560, + "id": "75f78270", + "metadata": {}, + "outputs": [], + "source": [ + "task_name = \"rte\"\n", + "bert_size = \"base\"\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else:\n", + " layer_num = 12\n", + " head_num = 12" + ] + }, + { + "cell_type": "markdown", + "id": "07b94ac6", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Model Dir, Device & Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 561, + "id": "7fec849f", + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "student_model_dir = os.path.join(model_dir,task_name)\n", + "# student_model_dir = os.path.join(output_dir, task_name, \"quant\", \"ternary_save\") # DA-A4W2 51.2\n", + "student_model_dir_1 = os.path.join(output_dir, task_name, \"exploration\", \"1SB_S\")\n", + "student_model_dir_2 = os.path.join(output_dir, task_name, \"exploration\", \"1SB_S_M\")\n", + "student_model_dir_3 = os.path.join(output_dir, task_name, \"exploration\", \"step_2_S_M\")\n", + "teacher_model_dir = os.path.join(model_dir,task_name)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 562, + "id": "3fb545e0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "08/02 01:12:08 PM Writing example 0 of 277\n", + "08/02 01:12:08 PM *** Example ***\n", + "08/02 01:12:08 PM guid: dev-0\n", + "08/02 01:12:08 PM tokens: [CLS] dana reeve , the widow of the actor christopher reeve , has died of lung cancer at age 44 , according to the christopher reeve foundation . [SEP] christopher reeve had an accident . [SEP]\n", + "08/02 01:12:08 PM input_ids: 101 11271 20726 1010 1996 7794 1997 1996 3364 5696 20726 1010 2038 2351 1997 11192 4456 2012 2287 4008 1010 2429 2000 1996 5696 20726 3192 1012 102 5696 20726 2018 2019 4926 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "08/02 01:12:08 PM input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "08/02 01:12:09 PM segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + "08/02 01:12:09 PM label: not_entailment\n", + "08/02 01:12:09 PM label_id: 1\n" + ] + } + ], + "source": [ + "# Processor & Task Info\n", + "processor = processors[task_name]()\n", + "output_mode = output_modes[task_name]\n", + "label_list = processor.get_labels()\n", + "num_labels = len(label_list)\n", + "\n", + "if task_name in default_params:\n", + " batch_size = default_params[task_name][\"batch_size\"]\n", + " max_seq_length = default_params[task_name][\"max_seq_length\"]\n", + " eval_step = default_params[task_name][\"eval_step\"]\n", + " \n", + "# Tokenizer\n", + "tokenizer = BertTokenizer.from_pretrained(teacher_model_dir, do_lower_case=True)\n", + "\n", + "\n", + "# Load Dataset\n", + "data_dir = os.path.join(\"data\",task_name)\n", + "processed_data_dir = os.path.join(data_dir,'preprocessed')\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)\n", + "eval_features = convert_examples_to_features(eval_examples, label_list, max_seq_length, tokenizer, output_mode)\n", + "# dev_file = train_file = os.path.join(processed_data_dir,'dev.pkl') \n", + "# eval_features = pickle.load(open(dev_file,'rb'))\n", + "\n", + "eval_data, eval_labels = get_tensor_data(\"classification\", eval_features)\n", + "eval_sampler = SequentialSampler(eval_data)\n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=1)\n", + "eval_data, eval_labels = get_tensor_data(output_mode, eval_features)\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)" + ] + }, + { + "cell_type": "markdown", + "id": "3f3b63b5", + "metadata": {}, + "source": [ + "# Model Build" + ] + }, + { + "cell_type": "code", + "execution_count": 563, + "id": "0c3a8929", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "08/02 01:12:11 PM Loading model models/rte/pytorch_model.bin\n", + "08/02 01:12:11 PM loading model...\n", + "08/02 01:12:11 PM done!\n", + "08/02 01:12:11 PM loading configuration file output/rte/exploration/1SB_S/config.json\n", + "08/02 01:12:12 PM Loading model output/rte/exploration/1SB_S/pytorch_model.bin\n", + "08/02 01:12:13 PM loading model...\n", + "08/02 01:12:13 PM done!\n", + "08/02 01:12:13 PM loading configuration file output/rte/exploration/1SB_S_M/config.json\n", + "08/02 01:12:14 PM Loading model output/rte/exploration/1SB_S_M/pytorch_model.bin\n", + "08/02 01:12:15 PM loading model...\n", + "08/02 01:12:15 PM done!\n", + "08/02 01:12:15 PM loading configuration file output/rte/exploration/step_2_S_M/config.json\n", + "08/02 01:12:16 PM Loading model output/rte/exploration/step_2_S_M/pytorch_model.bin\n", + "08/02 01:12:17 PM loading model...\n", + "08/02 01:12:17 PM done!\n", + "08/02 01:12:17 PM loading configuration file output/rte/exploration/1SB_S/config.json\n", + "08/02 01:12:18 PM Loading model models/rte/pytorch_model.bin\n", + "08/02 01:12:19 PM loading model...\n", + "08/02 01:12:19 PM done!\n", + "\n" + ] + } + ], + "source": [ + "device = \"cpu\"# torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "\n", + "# Teacher Model Build\n", + "teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + "teacher_model.to(device)\n", + "teacher_model.eval()\n", + "\n", + "# Student Model Build\n", + "student_config = BertConfig.from_pretrained(student_model_dir_1)\n", + "student_model_1 = QuantBertForSequenceClassification.from_pretrained(student_model_dir_1, config = student_config, num_labels=num_labels)\n", + "student_model_1.to(device)\n", + "\n", + "student_config = BertConfig.from_pretrained(student_model_dir_2)\n", + "student_model_2 = QuantBertForSequenceClassification.from_pretrained(student_model_dir_2, config = student_config, num_labels=num_labels)\n", + "student_model_2.to(device)\n", + "\n", + "student_config = BertConfig.from_pretrained(student_model_dir_3)\n", + "student_model_3 = QuantBertForSequenceClassification.from_pretrained(student_model_dir_3, config = student_config, num_labels=num_labels)\n", + "student_model_3.to(device)\n", + "\n", + "# Q Model Build\n", + "student_config = BertConfig.from_pretrained(student_model_dir_1)\n", + "q_model = QuantBertForSequenceClassification.from_pretrained(teacher_model_dir, config = student_config, num_labels=num_labels)\n", + "q_model.to(device)\n", + "\n", + "print()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 564, + "id": "a6bddc04", + "metadata": {}, + "outputs": [], + "source": [ + "batch = next(iter(eval_dataloader))\n", + "input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch\n", + "seq_length = seq_lengths.item()\n", + "\n", + "input_ids_sliced = input_ids[:,:seq_length]\n", + "seq_length = len(input_ids_sliced[0])\n", + "\n", + "input_id = []\n", + "for i in input_ids_sliced[0]:\n", + " input_id.append(i.item())\n", + "tokens = tokenizer.convert_ids_to_tokens(input_id)\n", + "\n", + "with torch.no_grad():\n", + " _, _, _, teacher_probs, teacher_values = teacher_model(input_ids_sliced)\n", + " _, _, _, student_probs_1, student_values = student_model_1(input_ids_sliced,teacher_outputs=None)\n", + " _, _, _, student_probs_2, student_values = student_model_2(input_ids_sliced,teacher_outputs=None)\n", + " _, _, _, student_probs_3, student_values = student_model_3(input_ids_sliced,teacher_outputs=None)\n", + " q_logits, q_atts, q_reps, q_probs, q_values = q_model(input_ids_sliced, teacher_outputs=None)\n", + " \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 565, + "id": "d3c080d0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "36" + ] + }, + "execution_count": 565, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(input_ids_sliced[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 566, + "id": "9728c493", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "36" + ] + }, + "execution_count": 566, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(tokens)" + ] + }, + { + "cell_type": "code", + "execution_count": 567, + "id": "fbd0c63d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([12, 12])" + ] + }, + "execution_count": 567, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "prob_1.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3dbc93cf", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "fs = 20\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " fig, [ax1, ax2, ax3, ax4] = plt.subplots(1, 4, figsize=(22,5), dpi=150)\n", + " \n", + " plt.subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=0.9, wspace=0.12, hspace=0.2)\n", + " tc_prob = teacher_probs[l][0,h,:,:]\n", + " q_prob = q_probs[l][0,h,:,:]\n", + " prob_1 = student_probs_1[l][0,h,:,:]\n", + " prob_3 = student_probs_3[l][0,h,:,:]\n", + " \n", + " heatmap = ax1.pcolor(tc_prob, cmap=plt.cm.Oranges)\n", + " ax1.set_xticklabels(tokens, minor=False)\n", + " ax1.set_yticklabels(tokens, minor=False)\n", + " ax1.set_xticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax1.set_yticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax1.set_title(f\"Teacher SA\", fontsize=fs)\n", + " ax1.tick_params(axis='x', labelsize=fs)\n", + " ax1.tick_params(axis='y', labelsize=fs)\n", + " ax1.get_yaxis().set_visible(False)\n", + " ax1.get_xaxis().set_visible(False)\n", + "\n", + " heatmap = ax2.pcolor(q_prob, cmap=plt.cm.binary)\n", + " ax2.set_xticklabels(tokens, minor=False)\n", + " ax2.set_yticklabels(tokens, minor=False)\n", + " ax2.set_xticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax2.set_yticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax2.set_title(\"After Quantization(w/o QAT)\", fontsize=fs)\n", + " ax2.tick_params(axis='x', labelsize=fs)\n", + " ax2.tick_params(axis='y', labelsize=fs)\n", + " ax2.get_xaxis().set_visible(False)\n", + " ax2.get_yaxis().set_visible(False)\n", + " \n", + " heatmap = ax3.pcolor(prob_1, cmap=plt.cm.Blues)\n", + " ax3.set_xticklabels(tokens, minor=False)\n", + " ax3.set_yticklabels(tokens, minor=False)\n", + " ax3.set_xticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax3.set_yticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax3.set_title(\"Ternary QAT\", fontsize=fs)\n", + " ax3.tick_params(axis='x', labelsize=fs)\n", + " ax3.tick_params(axis='y', labelsize=fs)\n", + " ax3.get_xaxis().set_visible(False)\n", + " ax3.get_yaxis().set_visible(False)\n", + " \n", + " heatmap = ax4.pcolor(prob_3, cmap=plt.cm.Blues)\n", + " ax4.set_xticklabels(tokens, minor=False)\n", + " ax4.set_yticklabels(tokens, minor=False)\n", + " ax4.set_xticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax4.set_yticks(numpy.arange(len(tokens)+0.5), minor=False)\n", + " ax4.set_title(\"SARQ QAT\", fontsize=fs)\n", + " ax4.tick_params(axis='x', labelsize=fs)\n", + " ax4.tick_params(axis='y', labelsize=fs)\n", + " ax4.get_xaxis().set_visible(False)\n", + " ax4.get_yaxis().set_visible(False)\n", + "\n", + " # plt.tight_layout()\n", + " plt.show()\n", + " plt.close(fig)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 468, + "id": "e043f448", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([1, 12, 12, 12])" + ] + }, + "execution_count": 468, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "student_probs_1[0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 491, + "id": "3d0c6066", + "metadata": {}, + "outputs": [], + "source": [ + "def ranking_loss_func(student_probs, teacher_probs):\n", + " Loss_ranking = 0\n", + "\n", + " loss_ranking_list = []\n", + "\n", + " for l in tqdm(range(layer_num)):\n", + " for h in range(head_num):\n", + " student_prob_plt = student_probs[l][0,h,:,:]\n", + " teacher_prob_plt = teacher_probs[l][0,h,:,:]\n", + " Loss_ranking = 0\n", + " for h in range(seq_length):\n", + " for idx in range(0, seq_length-1):\n", + " for jdx in range(1, seq_length):\n", + " p = (student_prob_plt[h][idx] - student_prob_plt[h][jdx])*(torch.sgn(teacher_prob_plt[h][idx] - teacher_prob_plt[h][jdx]))\n", + " # print(max(0, - p.item()))\n", + " Loss_ranking += max(0, - p.item())\n", + " loss_ranking_list.append(Loss_ranking)\n", + " return loss_ranking_list\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33efbeb0", + "metadata": {}, + "outputs": [], + "source": [ + "torch.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 460, + "id": "5716ce81", + "metadata": {}, + "outputs": [], + "source": [ + "a = dict()\n", + "a[\"Ternary\"] = st_1\n", + "a[\"1SB\"] = st_2\n", + "a[\"2SB\"] = st_3\n", + "torch.save(a, \"sst-2_ranking_loss.pth\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 492, + "id": "5395c9d8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:02<00:00, 4.22it/s]\n", + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:02<00:00, 4.18it/s]\n", + "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:02<00:00, 4.24it/s]\n" + ] + } + ], + "source": [ + "st_1 = ranking_loss_func(student_probs_1, teacher_probs)\n", + "st_2 = ranking_loss_func(student_probs_2, teacher_probs)\n", + "st_3 = ranking_loss_func(student_probs_3, teacher_probs)" + ] + }, + { + "cell_type": "code", + "execution_count": 493, + "id": "7279f340", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Ranking Loss(CoLA)')" + ] + }, + "execution_count": 493, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(1, 1, figsize=(10, 5), dpi=120)\n", + "fs=20\n", + "lw=3\n", + "ax.plot(list(range(layer_num*head_num)), st_1, label=\"MSE(Ternary)\", color=\"tab:blue\", linewidth=lw, alpha=1)\n", + "ax.plot(list(range(layer_num*head_num)), st_2, label=\"KL-Div(SARQ-1step)\", color=\"orange\", linewidth=lw, alpha=0.8)\n", + "ax.plot(list(range(layer_num*head_num)), st_3, label=\"SARQ\", color=\"tab:red\", linewidth=lw, alpha=0.8)\n", + "\n", + "ax.legend(fontsize=fs, loc=1)\n", + "ax.set_xlabel(\"Head Number\", fontsize=fs)\n", + "ax.set_ylabel(f\"Ranking Loss(CoLA)\", fontsize=fs)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 322, + "id": "0c6c75fe", + "metadata": {}, + "outputs": [], + "source": [ + "magic_number = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 323, + "id": "b1fbcb83", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "ranking_dict = dict()\n", + "tc_ratio_dict = dict()\n", + "st_ratio_dict_1 = dict()\n", + "st_ratio_dict_2 = dict()\n", + "q_ratio_dict = dict()\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " ranking_dict[f\"{l}_{h}\"] = []\n", + " tc_ratio_dict[f\"{l}_{h}\"] = []\n", + " st_ratio_dict_1[f\"{l}_{h}\"] = []\n", + " st_ratio_dict_2[f\"{l}_{h}\"] = []\n", + " q_ratio_dict[f\"{l}_{h}\"] = []\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_tc = teacher_probs[l][0,h,:,:].mean(dim=0).clone().detach()\n", + " token_order_tc = torch.sort(token_avg_tc, stable=True, descending=True)[1].clone().detach() \n", + " ranking_dict[f\"{l}_{h}\"].append(token_order_tc[:magic_number])\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 324, + "id": "0ef6b786", + "metadata": {}, + "outputs": [], + "source": [ + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_tc = teacher_probs[l][0,h,:,:].mean(dim=0).clone().detach()\n", + " token_order_tc = torch.sort(token_avg_tc, stable=True)[1].clone().detach() \n", + " for token in ranking_dict[f\"{l}_{h}\"][0]:\n", + " ratio = torch.where(token_order_tc == token)[0] / seq_length\n", + " tc_ratio_dict[f\"{l}_{h}\"].append(ratio)\n", + "\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_st = student_probs_1[l][0,h,:,:].mean(dim=0).clone().detach()\n", + " token_order_st = torch.sort(token_avg_st, stable=True)[1].clone().detach() \n", + " for token in ranking_dict[f\"{l}_{h}\"][0]:\n", + " ratio = torch.where(token_order_st == token)[0] / seq_length\n", + " st_ratio_dict_1[f\"{l}_{h}\"].append(ratio)\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_st = student_probs_2[l][0,h,:,:].mean(dim=0).clone().detach()\n", + " token_order_st = torch.sort(token_avg_st, stable=True)[1].clone().detach() \n", + " for token in ranking_dict[f\"{l}_{h}\"][0]:\n", + " ratio = torch.where(token_order_st == token)[0] / seq_length\n", + " st_ratio_dict_2[f\"{l}_{h}\"].append(ratio)\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " token_avg_st = q_probs[l][0,h,:,:].mean(dim=0).clone().detach()\n", + " token_order_st = torch.sort(token_avg_st, stable=True)[1].clone().detach() \n", + " for token in ranking_dict[f\"{l}_{h}\"][0]:\n", + " ratio = torch.where(token_order_st == token)[0] / seq_length\n", + " q_ratio_dict[f\"{l}_{h}\"].append(ratio)" + ] + }, + { + "cell_type": "code", + "execution_count": 325, + "id": "17ec8118", + "metadata": {}, + "outputs": [], + "source": [ + "tc_ranking = dict()\n", + "st_ranking_1 = dict()\n", + "st_ranking_2 = dict()\n", + "q_ranking = dict()\n", + "for i in range(magic_number):\n", + " tc_ranking[f\"{i}\"] = []\n", + " st_ranking_1[f\"{i}\"] = []\n", + " st_ranking_2[f\"{i}\"] = []\n", + " q_ranking[f\"{i}\"] = []\n", + " \n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num):\n", + " for i in range(magic_number):\n", + " tc_ranking[f\"{i}\"].append(tc_ratio_dict[f\"{l}_{h}\"][i])\n", + " st_ranking_1[f\"{i}\"].append(st_ratio_dict_1[f\"{l}_{h}\"][i])\n", + " st_ranking_2[f\"{i}\"].append(st_ratio_dict_2[f\"{l}_{h}\"][i])\n", + " q_ranking[f\"{i}\"].append(q_ratio_dict[f\"{l}_{h}\"][i])\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 326, + "id": "ea585c88", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, [ax1, ax2, ax3, ax4] = plt.subplots(4, 1, figsize=(14, 14), dpi=70)\n", + "\n", + "# cl = plt.cm.plasma(np.linspace(0, 1, 3))\n", + "# cl = [\"plasma\", \"b\", \"dodgerblue\"]\n", + "for i in range(magic_number):\n", + " ax1.plot(list(range(layer_num*head_num)), tc_ranking[f\"{i}\"], label=f\"ranking-{i}\", linewidth=2.5, alpha=0.8)\n", + " ax2.plot(list(range(layer_num*head_num)), st_ranking_1[f\"{i}\"], label=f\"ranking-{i}\", linewidth=2.5, alpha=0.8)\n", + " ax3.plot(list(range(layer_num*head_num)), st_ranking_2[f\"{i}\"], label=f\"ranking-{i}\", linewidth=2.5, alpha=0.8)\n", + " ax4.plot(list(range(layer_num*head_num)), q_ranking[f\"{i}\"], label=f\"ranking-{i}\", linewidth=2.5, alpha=0.8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fda6327", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "34a0dfd7", + "metadata": {}, + "source": [ + "## Model Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c515c68", + "metadata": {}, + "outputs": [], + "source": [ + "eval_st = 1\n", + "eval_tc = 1\n", + "\n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=32)\n", + "\n", + "if eval_st:\n", + " print(\"Student Model Inferece\")\n", + " student_model.eval()\n", + " student_result = do_eval(student_model, task_name, eval_dataloader, device, output_mode, eval_labels, num_labels, teacher_model=teacher_model)\n", + " print(f\"Student Result : {student_result}\")\n", + "\n", + "if eval_tc:\n", + " print(\"Teacher Model Inferece\")\n", + " teacher_result = do_eval(teacher_model, task_name, eval_dataloader, device, output_mode, eval_labels, num_labels)\n", + " print(f\"Teacher Result : {teacher_result}\")\n", + " \n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=1)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 220, + "id": "0eedf469", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input_ids : tensor([[ 101, 2198, 5720, 2000, 3021, 2055, 2370, 1012, 102]])\n", + "tokens : ['[CLS]', 'john', 'talked', 'to', 'bill', 'about', 'himself', '.', '[SEP]']\n", + "A : john talked to bill about himself . \n", + "B : \n", + "tensor([8])\n" + ] + } + ], + "source": [ + "# Sampling Sentence \n", + "i = 0 \n", + "# num = \n", + "num = 0\n", + "for step, batch in enumerate(eval_dataloader):\n", + " model.train()\n", + " \n", + " batch = tuple(t.to(device) for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch\n", + " i = i + 1\n", + " if i == num:\n", + " break\n", + "\n", + "seq_length = seq_lengths.item()\n", + "\n", + "input_ids_sliced = input_ids[:,:seq_length]\n", + "input_id = []\n", + "for i in input_ids_sliced[0]:\n", + " input_id.append(i.item())\n", + "tokens = tokenizer.convert_ids_to_tokens(input_id)\n", + "\n", + "\n", + "\n", + "sample_sentence_a = str()\n", + "sample_sentence_b = str()\n", + "index = 0\n", + "\n", + "for i, word in enumerate(tokens[1:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_a += word\n", + " sample_sentence_a += \" \"\n", + "index = i\n", + "\n", + "for i, word in enumerate(tokens[index+2:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_b += word\n", + " sample_sentence_b += \" \"\n", + "\n", + "sep_index = torch.where(input_ids[0] == 102)[0]\n", + "\n", + "if len(sample_sentence_b) > 1:\n", + " sample_sentence_b_start = segment_ids[0].tolist().index(1)\n", + "else:\n", + " sample_sentence_b_start = None\n", + "\n", + "print(f\"input_ids : {input_ids_sliced}\")\n", + "print(f\"tokens : {tokens}\")\n", + "print(f\"A : {sample_sentence_a}\")\n", + "print(f\"B : {sample_sentence_b}\")\n", + "print(sep_index)" + ] + }, + { + "cell_type": "markdown", + "id": "265abf87", + "metadata": {}, + "source": [ + "## BERTViz Model View" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "780d1da2", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from bertviz.transformers_neuron_view import BertModel, BertTokenizer\n", + "from bertviz.neuron_view import show\n", + "\n", + "bertviz_neuron_tc = 1\n", + "bertviz_neuron_st = 0\n", + "bertviz_model_tc = 0\n", + "bertviz_model_st = 0\n", + "\n", + "# Quantization Setting\n", + "if bertviz_neuron_st or bertviz_model_st:\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, (QuantizeLinear, ClipLinear)): \n", + " module.act_flag = True\n", + " module.weight_flag = True\n", + " if isinstance(module, QuantizeAct): \n", + " module.act_flag = True\n", + " module.weight_flag = True\n", + "\n", + "if bertviz_neuron_tc or bertviz_neuron_st:\n", + " if bertviz_neuron_st:\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, BertSelfAttention): \n", + " module.output_bertviz = True\n", + " if bertviz_neuron_tc:\n", + " for name, module in teacher_model.named_modules():\n", + " if isinstance(module, FP_BertSelfAttention): \n", + " module.output_bertviz = True\n", + "\n", + " model_type = 'bert'\n", + " model_version = 'bert-base-uncased'\n", + " \n", + " tokenizer = BertTokenizer.from_pretrained(model_version, do_lower_case=True)\n", + " if bertviz_neuron_tc:\n", + " if len(sample_sentence_b) > 1:\n", + " show(teacher_model.cpu(), model_type, tokenizer, sample_sentence_a, sample_sentence_b, display_mode=\"light\")\n", + " else:\n", + " show(teacher_model.cpu(), model_type, tokenizer, sample_sentence_a,display_mode=\"light\")\n", + " if bertviz_neuron_st:\n", + " if len(sample_sentence_b) > 1:\n", + " show(student_model.cpu(), model_type, tokenizer, sample_sentence_a, sample_sentence_b, display_mode=\"light\")\n", + " else:\n", + " show(student_model.cpu(), model_type, tokenizer, sample_sentence_a,display_mode=\"light\")\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else:\n", + " layer_num = 12\n", + " head_num = 12\n", + " \n", + "all_layers = list(range(layer_num))\n", + "layers_to_show = all_layers[20:]\n", + "\n", + "if bertviz_model_tc or bertviz_model_st:\n", + " \n", + " if bertviz_model_tc:\n", + " print(\"teacher_map\")\n", + " for name, module in teacher_model.named_modules():\n", + " if isinstance(module, FP_BertSelfAttention): \n", + " module.output_bertviz = False\n", + " teacher_model.eval()\n", + " teacher_model.to(device)\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + " model_view(teacher_probs, tokens, include_layers=layers_to_show, display_mode=\"light\")\n", + " \n", + " if bertviz_model_st:\n", + " print(\"student_map\")\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, BertSelfAttention): \n", + " module.output_bertviz = False\n", + " student_model.eval()\n", + " student_model.to(device)\n", + " student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_outputs=None)\n", + " model_view(student_probs, tokens, sample_sentence_b_start,include_layers=layers_to_show, display_mode=\"light\")# , include_layers=[0, 1])\n", + " \n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64cf6575", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from torch.nn import MSELoss\n", + "mse_func = MSELoss()\n", + "loss_cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)\n", + "norm_func = torch.linalg.norm\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else:\n", + " layer_num = 12\n", + " head_num = 12\n", + "\n", + "attention_mean_check = 0\n", + "cover_mean_check = 0\n", + "kl_div_check = 1\n", + "mse_check = 0\n", + "attnmap_mse_check = 0\n", + "norm_check = 0\n", + "\n", + "exclude_sep = 0\n", + "\n", + "for name, module in student_model.named_modules():\n", + " if isinstance(module, BertSelfAttention): \n", + " module.output_bertviz = False\n", + "for name, module in teacher_model.named_modules():\n", + " if isinstance(module, FP_BertSelfAttention): \n", + " module.output_bertviz = False\n", + " \n", + "for name, module in student_model.named_modules():\n", + " if isinstance(module, (QuantizeLinear, ClipLinear, QuantizeAct)): \n", + " module.act_flag = True\n", + " module.weight_flag = True\n", + "\n", + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "\n", + "student_model.eval()\n", + "teacher_model.eval()\n", + "student_model.to(device)\n", + "teacher_model.to(device)\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + "student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + "\n", + "if cover_mean_check:\n", + " print(\"COVER MEAN CHECK\")\n", + " top_k = 5\n", + "\n", + " for i in range(layer_num):\n", + " teacher = teacher_probs[i][0]\n", + " student = student_probs[i][0]\n", + "\n", + " head_sum = 0\n", + " for h in range(head_num):\n", + " coverage_head_sum = 0\n", + " for row in range(seq_length-1):\n", + " if exclude_sep:\n", + " tc_argsort = teacher[h][:seq_length-1,:seq_length-1].sort(descending=True)[1][row][:top_k] # top-k\n", + " st_argsort = student[h][:seq_length-1,:seq_length-1].sort(descending=True)[1][row]\n", + " tc_argsort = teacher[h].sort(descending=True)[1][row][:top_k] # top-k\n", + " st_argsort = student[h].sort(descending=True)[1][row]\n", + "\n", + " max_idx = 0\n", + " for idx in tc_argsort:\n", + " tmp = torch.where(st_argsort == idx)\n", + " max_idx = max(tmp[0].item(), max_idx)\n", + "\n", + " coverage_ratio = max_idx / student.shape[1]\n", + " coverage_head_sum += coverage_ratio\n", + "\n", + " # print(f\"H{h} : {coverage_head_sum/seq_length}\")\n", + "\n", + " head_sum += coverage_head_sum / seq_length\n", + " print(head_sum / head_num)\n", + "\n", + "if kl_div_check:\n", + " print(\"KL DIV CHECK\")\n", + " for i in range(layer_num):\n", + " if exclude_sep:\n", + " if len(sep_index) == 2:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000; teacher_atts[i][:,:,:,sep_index[1]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000; student_atts[i][:,:,:,sep_index[1]] = -100000\n", + " else:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000\n", + " \n", + " teacher = torch.nn.Softmax(dim=-1)(teacher_atts[i])\n", + " student = torch.nn.Softmax(dim=-1)(student_atts[i])\n", + " \n", + " student = torch.clamp_min(student, 1e-8)\n", + " teacher = torch.clamp_min(teacher, 1e-8)\n", + " else: \n", + " teacher = teacher_probs[i]\n", + " student = student_probs[i]\n", + " \n", + " neg_cross_entropy = teacher * torch.log(student) \n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " # p(t) log p(t) = negative entropy\n", + " neg_entropy = teacher * torch.log(teacher) \n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) / seq_lengths.view(-1, 1) # (b, h, s) -> (b, h)\n", + "\n", + " kld_loss = neg_entropy - neg_cross_entropy\n", + "\n", + " kld_loss_sum = torch.sum(kld_loss)\n", + " print(kld_loss_sum.item())\n", + "\n", + "if mse_check:\n", + " for i in range(layer_num):\n", + " print(mse_func(teacher_atts[i], student_atts[i]).item())\n", + " \n", + "if attnmap_mse_check:\n", + " for i in range(layer_num):\n", + " if exclude_sep:\n", + " if len(sep_index) == 2:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000; teacher_atts[i][:,:,:,sep_index[1]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000; student_atts[i][:,:,:,sep_index[1]] = -100000\n", + " else:\n", + " teacher_atts[i][:,:,:,sep_index[0]] = -100000\n", + " student_atts[i][:,:,:,sep_index[0]] = -100000\n", + " \n", + " teacher = torch.nn.Softmax(dim=-1)(teacher_atts[i])\n", + " student = torch.nn.Softmax(dim=-1)(student_atts[i])\n", + " print(mse_func(teacher, student).item())\n", + " else: \n", + " print(mse_func(teacher_probs[i], student_probs[i]).item())\n", + "\n", + "if norm_check:\n", + " for i in range(layer_num):\n", + " print(mse_func(torch.linalg.norm(teacher_values[i], dim=-1), torch.linalg.norm(student_values[i], dim=-1)).mean().item())\n", + " # print(loss_cos(teacher_values[i], student_values[i]).mean().item())\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "6a0039fe", + "metadata": {}, + "source": [ + "## Norm-Weight Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d95ec106", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from torch.nn import MSELoss\n", + "mse_func = MSELoss()\n", + "loss_cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)\n", + "norm_func = torch.linalg.norm\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else:\n", + " layer_num = 12\n", + " head_num = 12\n", + " \n", + "sep_w_list = [ AverageMeter() for i in range(layer_num) ]; cls_w_list = [ AverageMeter() for i in range(layer_num) ] ;punc_w_list = [ AverageMeter() for i in range(layer_num) ] ;other_w_list = [ AverageMeter() for i in range(layer_num) ]\n", + "sep_n_list = [ AverageMeter() for i in range(layer_num) ]; cls_n_list = [ AverageMeter() for i in range(layer_num) ] ;punc_n_list = [ AverageMeter() for i in range(layer_num) ] ;other_n_list = [ AverageMeter() for i in range(layer_num) ]\n", + "sep_v_list = [ AverageMeter() for i in range(layer_num) ]; cls_v_list = [ AverageMeter() for i in range(layer_num) ] ;punc_v_list = [ AverageMeter() for i in range(layer_num) ] ;other_v_list = [ AverageMeter() for i in range(layer_num) ]\n", + "\n", + "\n", + "\n", + "for batch in tqdm(eval_dataloader, desc=\"Inference\"):\n", + " model.train()\n", + " \n", + " batch = tuple(t.to(device) for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch\n", + " seq_length = seq_lengths.item()\n", + " input_ids_sliced = input_ids[:,:seq_length]\n", + " sep_index = torch.where(input_ids[0] == 102)[0]\n", + " \n", + " student_model.eval()\n", + " teacher_model.eval()\n", + " student_model.to(device)\n", + " teacher_model.to(device)\n", + " \n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + " student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + " probs = student_probs\n", + " values = student_values\n", + " \n", + "# probs = teacher_probs\n", + "# values = teacher_values\n", + " \n", + " \n", + " \n", + "\n", + " for i in range(layer_num):\n", + " # Attention Weight Analysis\n", + " if len(sep_index) == 2:\n", + " sep_w = (probs[i][0,:,:,sep_index[0]].mean() + probs[i][0,:,:,sep_index[1]].mean() / 2).item()\n", + " punc_w = (probs[i][0,:,:,sep_index[0] - 1].mean() + probs[i][0,:,:,sep_index[1] - 1].mean() / 2).item()\n", + " sep_w_list[i].update(sep_w)\n", + " punc_w_list[i].update(punc_w)\n", + " else:\n", + " sep_w = probs[i][0,:,:,sep_index[0]].mean().item()\n", + " punc_w = probs[i][0,:,:,sep_index[0]-1].mean().item()\n", + " sep_w_list[i].update(sep_w)\n", + " punc_w_list[i].update(punc_w)\n", + " cls_w = probs[i][0,:,:,0].mean().item()\n", + " cls_w_list[i].update(cls_w)\n", + " other_w_list[i].update(1 - (sep_w + punc_w + cls_w))\n", + "\n", + " # Attention Norm based Analysis (|| alpha f(x) ||)\n", + " if len(sep_index) == 2:\n", + " sep_n = (norm_func(values[i][0][0,:,sep_index[0],:], dim=-1).mean() + norm_func(values[i][0][0,:,sep_index[1],:], dim=-1).mean() / 2).item()\n", + " punc_n = (norm_func(values[i][0][0,:,sep_index[0] - 1,:], dim=-1).mean() + norm_func(values[i][0][0,:,sep_index[1] - 1,:], dim=-1).mean() / 2).item()\n", + " sep_n_list[i].update(sep_n)\n", + " punc_n_list[i].update(punc_n)\n", + " else:\n", + " sep_n = norm_func(values[i][0][0,:,sep_index[0],:], dim=-1).mean().item()\n", + " punc_n = norm_func(values[i][0][0,:,sep_index[0]-1,:], dim=-1).mean().item()\n", + " sep_n_list[i].update(sep_n)\n", + " punc_n_list[i].update(punc_n)\n", + "\n", + " cls_n = norm_func(values[i][0][0,:,0,:], dim=-1).mean().item()\n", + " cls_n_list[i].update(cls_n)\n", + " \n", + " values[i][0][0,:,sep_index[0],:] = 0\n", + " if len(sep_index) == 2:\n", + " values[i][0][0,:,sep_index[1],:] = 0\n", + " \n", + " values[i][0][0,:,sep_index[0] - 1,:] = 0\n", + " if len(sep_index) == 2:\n", + " values[i][0][0,:,sep_index[1] - 1,:] = 0\n", + " \n", + " values[i][0][0,:,0,:] = 0\n", + "\n", + " shape = values[i][0][0,:,:,:].shape\n", + " if len(sep_index) == 2:\n", + " num = shape[0] * (shape[1] - 5)\n", + " else:\n", + " num = shape[0] * (shape[1] - 3)\n", + "\n", + " other_n = (norm_func(values[i][1][0,:,:,:], dim=-1).sum() / num).item()\n", + " other_n_list[i].update(other_n)\n", + "\n", + " # Attention Norm based Analysis (|| f(x) ||)\n", + " if len(sep_index) == 2:\n", + " sep_v_list[i].update((norm_func(values[i][1][0,:,sep_index[0],:], dim=-1).mean() + norm_func(values[i][1][0,:,sep_index[1],:], dim=-1).mean() / 2).item())\n", + " punc_v_list[i].update((norm_func(values[i][1][0,:,sep_index[0] - 1,:], dim=-1).mean() + norm_func(values[i][1][0,:,sep_index[1] - 1,:], dim=-1).mean() / 2).item())\n", + " else:\n", + " sep_v_list[i].update(norm_func(values[i][1][0,:,sep_index[0],:], dim=-1).mean().item())\n", + " punc_v_list[i].update(norm_func(values[i][1][0,:,sep_index[0]-1,:], dim=-1).mean().item())\n", + " cls_v_list[i].update(norm_func(values[i][1][0,:,0,:], dim=-1).mean().item())\n", + "\n", + " values[i][1][0,:,sep_index[0],:] = 0\n", + " if len(sep_index) == 2:\n", + " values[i][1][0,:,sep_index[1],:] = 0\n", + " \n", + " values[i][1][0,:,sep_index[0] - 1,:] = 0\n", + " if len(sep_index) == 2:\n", + " values[i][1][0,:,sep_index[1] - 1,:] = 0\n", + " values[i][1][0,:,0,:] = 0\n", + "\n", + " shape = values[i][1][0,:,:,:].shape\n", + " if len(sep_index) == 2:\n", + " num = shape[0] * (shape[1] - 5)\n", + " else:\n", + " num = shape[0] * (shape[1] - 3)\n", + "\n", + " other_v_list[i].update((norm_func(values[i][1][0,:,:,:], dim=-1).sum() / num).item())\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb671dd0", + "metadata": {}, + "outputs": [], + "source": [ + "l_sep_w_list = list(); l_cls_w_list = list() ;l_punc_w_list = list() ;l_other_w_list = list()\n", + "l_sep_n_list = list(); l_cls_n_list = list() ;l_punc_n_list = list() ;l_other_n_list = list()\n", + "l_sep_v_list = list(); l_cls_v_list = list() ;l_punc_v_list = list() ;l_other_v_list = list()\n", + "\n", + "fig, [ax1, ax2, ax3] = plt.subplots(3,1, figsize=(12,16)) \n", + "x_axis = list(range(layer_num))\n", + "\n", + "for layer in range(layer_num):\n", + " l_sep_w_list.append(sep_w_list[layer].avg); l_sep_n_list.append(sep_n_list[layer].avg); l_sep_v_list.append(sep_v_list[layer].avg)\n", + " l_cls_w_list.append(cls_w_list[layer].avg); l_cls_n_list.append(cls_n_list[layer].avg); l_cls_v_list.append(cls_v_list[layer].avg)\n", + " l_punc_w_list.append(punc_w_list[layer].avg); l_punc_n_list.append(punc_n_list[layer].avg); l_punc_v_list.append(punc_v_list[layer].avg)\n", + " l_other_w_list.append(other_w_list[layer].avg); l_other_n_list.append(other_n_list[layer].avg); l_other_v_list.append(other_v_list[layer].avg) \n", + "\n", + "ax1.set_title(\"Attention Weight-based Analysis\")\n", + "ax1.plot(x_axis, l_sep_w_list, label=\"SEP\", linewidth=3)\n", + "ax1.plot(x_axis, l_cls_w_list, label=\"CLS\", linewidth=3)\n", + "ax1.plot(x_axis, l_punc_w_list, label=\". or ,\", linewidth=3)\n", + "ax1.plot(x_axis, l_other_w_list, label=\"Other\", linewidth=3)\n", + "ax1.legend()\n", + "\n", + "ax2.set_title(\"Attention Norm-based Analysis\")\n", + "ax2.plot(x_axis, l_sep_n_list, label=\"SEP\", linewidth=3)\n", + "ax2.plot(x_axis, l_cls_n_list, label=\"CLS\", linewidth=3)\n", + "ax2.plot(x_axis, l_punc_n_list, label=\". or ,\", linewidth=3)\n", + "ax2.plot(x_axis, l_other_n_list, label=\"Other\", linewidth=3)\n", + "ax2.legend()\n", + "\n", + "ax3.set_title(\"Attention Value-Norm-based Analysis\")\n", + "ax3.plot(x_axis, l_sep_v_list, label=\"SEP\", linewidth=3)\n", + "ax3.plot(x_axis, l_cls_v_list, label=\"CLS\", linewidth=3)\n", + "ax3.plot(x_axis, l_punc_v_list, label=\". or ,\", linewidth=3)\n", + "ax3.plot(x_axis, l_other_v_list, label=\"Other\", linewidth=3)\n", + "ax3.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "138ef071", + "metadata": {}, + "source": [ + "### Weight - Norm Based Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5ee6ace", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "\n", + "student_model.eval()\n", + "teacher_model.eval()\n", + "student_model.to(device)\n", + "teacher_model.to(device)\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + "student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + "\n", + "probs = student_probs\n", + "values = student_values\n", + "# probs = teacher_probs\n", + "# values = teacher_values\n", + "\n", + "table_sep = [[0] * head_num for i in range(layer_num)]\n", + "table_cls = [[0] * head_num for i in range(layer_num)]\n", + "table_punc = [[0] * head_num for i in range(layer_num)]\n", + "table_other = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num): \n", + " prob_sep = ((probs[l][:,h,:,sep_index[0]].mean() + probs[l][:,h,:,sep_index[1]].mean())/2).item()\n", + " table_sep[l][h] = prob_sep\n", + " prob_cls = probs[l][:,h,:,0].mean().item()\n", + " table_cls[l][h] = prob_cls\n", + " prob_punc = ((probs[l][:,h,:,sep_index[0]-1].mean() + probs[l][:,h,:,sep_index[1]-1].mean())/2).item()\n", + " table_punc[l][h] = prob_punc\n", + "# prob_other = 1 - (prob_sep + prob_sep + prob_punc)\n", + "# table_other[l][h] = prob_other\n", + "\n", + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "ax1.set_xlabel(\"head\", fontsize=30); ax1.set_ylabel(\"layer\", fontsize=30)\n", + "# ax1.set_title(\"SEP Probability AVG\")\n", + "heatmap = ax1.pcolor(table_sep, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax1)\n", + "\n", + "ax2.set_xlabel(\"head\", fontsize=36); ax2.set_ylabel(\"layer\", fontsize=36)\n", + "# ax2.set_title(\"CLS Probability AVG\")\n", + "heatmap = ax2.pcolor(table_cls, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax2)\n", + "\n", + "ax3.set_xlabel(\"head\", fontsize=36); ax3.set_ylabel(\"layer\", fontsize=36)\n", + "# ax3.set_title(\"PUNC Probability AVG\")\n", + "heatmap = ax3.pcolor(table_punc, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax3)\n", + "plt.show()\n", + "# ax4.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "# ax4.set_title(\"Other Probability AVG\")\n", + "# heatmap = ax4.pcolor(table_other, cmap=plt.cm.Blues)\n", + "\n", + "\n", + "norm_type = 0 # 0 : f(x) 1 : p*f(x)\n", + "table_sep = [[0] * head_num for i in range(layer_num)]\n", + "table_cls = [[0] * head_num for i in range(layer_num)]\n", + "table_punc = [[0] * head_num for i in range(layer_num)]\n", + "# table_other = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num): \n", + " norm_sep = (norm_func(values[l][norm_type][0,h,sep_index[0],:], dim=-1).mean() + norm_func(values[l][norm_type][0,h,sep_index[1],:], dim=-1).mean() / 2).item()\n", + " table_sep[l][h] = norm_sep\n", + " norm_cls = norm_func(values[l][norm_type][0,h,0,:], dim=-1).mean().item()\n", + " table_cls[l][h] = norm_cls\n", + " norm_punc = (norm_func(values[l][norm_type][0,h,sep_index[0] - 1,:], dim=-1).mean() + norm_func(values[l][norm_type][0,h,sep_index[1] - 1,:], dim=-1).mean() / 2).item()\n", + " table_punc[l][h] = norm_punc\n", + "# prob_other = 1 - (prob_sep + prob_sep + prob_punc)\n", + "# table_other[l][h] = prob_other\n", + "\n", + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "# fig, ax = plt.subplots(1, 1, figsize=(8,8))\n", + "ax1.set_xlabel(\"head\", fontsize = 28); ax.set_ylabel(\"layer\", fontsize = 28)\n", + "ax1.set_title(\"SEP ||f(x)|| Norm AVG\")\n", + "heatmap = ax1.pcolor(table_sep, cmap=plt.cm.Blues)\n", + "# fig.savefig(\"2SB.png\")\n", + "fig.colorbar(heatmap, ax=ax1)\n", + "\n", + "ax2.set_xlabel(\"head\", fontsize = 28); ax2.set_ylabel(\"layer\", fontsize = 28)\n", + "ax2.set_title(\"CLS ||f(x)|| Norm AVG\")\n", + "heatmap = ax2.pcolor(table_cls, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax2)\n", + "\n", + "ax3.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax3.set_title(\"PUNC ||f(x)|| Norm AVG\")\n", + "heatmap = ax3.pcolor(table_punc, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax3)\n", + "plt.show()\n", + "\n", + "\n", + "norm_type = 1 # 0 : f(x) 1 : p*f(x)\n", + "table_sep = [[0] * head_num for i in range(layer_num)]\n", + "table_cls = [[0] * head_num for i in range(layer_num)]\n", + "table_punc = [[0] * head_num for i in range(layer_num)]\n", + "# table_other = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num): \n", + " norm_sep = (norm_func(values[l][norm_type][0,h,sep_index[0],:], dim=-1).mean() + norm_func(values[l][norm_type][0,h,sep_index[1],:], dim=-1).mean() / 2).item()\n", + " table_sep[l][h] = norm_sep\n", + " norm_cls = norm_func(values[l][norm_type][0,h,0,:], dim=-1).mean().item()\n", + " table_cls[l][h] = norm_cls\n", + " norm_punc = (norm_func(values[l][norm_type][0,h,sep_index[0] - 1,:], dim=-1).mean() + norm_func(values[l][norm_type][0,h,sep_index[1] - 1,:], dim=-1).mean() / 2).item()\n", + " table_punc[l][h] = norm_punc\n", + "# prob_other = 1 - (prob_sep + prob_sep + prob_punc)\n", + "# table_other[l][h] = prob_other\n", + "\n", + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "ax1.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax1.set_title(\"SEP ||p*f(x)|| Norm AVG\")\n", + "heatmap = ax1.pcolor(table_sep, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax1)\n", + "\n", + "ax2.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax2.set_title(\"CLS ||p*f(x)|| Norm AVG\")\n", + "heatmap = ax2.pcolor(table_cls, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax2)\n", + "\n", + "ax3.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax3.set_title(\"PUNC ||p*f(x)|| Norm AVG\")\n", + "heatmap = ax3.pcolor(table_punc, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax3)\n", + "plt.show()\n", + "# ax4.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "# ax4.set_title(\"Other Probability AVG\")\n", + "# heatmap = ax4.pcolor(table_other, cmap=plt.cm.Blues)" + ] + }, + { + "cell_type": "markdown", + "id": "c4722d72", + "metadata": {}, + "source": [ + "### Norm Based Cosine Similarity Comparison" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57688e44", + "metadata": {}, + "outputs": [], + "source": [ + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "\n", + "student_model.eval()\n", + "teacher_model.eval()\n", + "student_model.to(device)\n", + "teacher_model.to(device)\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + "student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + "st_values = student_values\n", + "tc_values = teacher_values\n", + "\n", + "loss_cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)\n", + "\n", + "norm_type = 0 # 0 : f(x) 1 : p*f(x)\n", + "\n", + "table_sep = [[0] * head_num for i in range(layer_num)]\n", + "table_cls = [[0] * head_num for i in range(layer_num)]\n", + "table_punc = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num): \n", + " # cos_sep_1 = loss_cos(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + " # cos_sep_2 = loss_cos(st_values[l][norm_type][0,h,sep_index[1],:], tc_values[l][norm_type][0,h,sep_index[1],:]) \n", + " cos_sep_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + " cos_sep_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1],:], tc_values[l][norm_type][0,h,sep_index[1],:]) \n", + " # table_sep[l][h] = (1-((cos_sep_1 + cos_sep_2) / 2)).item()\n", + " table_sep[l][h] = ((cos_sep_1 + cos_sep_2) / 2).item()\n", + " \n", + " # cos_cls = loss_cos(st_values[l][norm_type][0,h,0,:], tc_values[l][norm_type][0,h,0,:])\n", + " cos_cls = mse_func(st_values[l][norm_type][0,h,0,:], tc_values[l][norm_type][0,h,0,:])\n", + " # table_cls[l][h] = (1-cos_cls).item()\n", + " table_cls[l][h] = (cos_cls).item()\n", + " \n", + " # cos_punc_1 = loss_cos(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:]) \n", + " # cos_punc_2 = loss_cos(st_values[l][norm_type][0,h,sep_index[1]-1,:], tc_values[l][norm_type][0,h,sep_index[1]-1,:]) \n", + " cos_punc_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:]) \n", + " cos_punc_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1]-1,:], tc_values[l][norm_type][0,h,sep_index[1]-1,:]) \n", + " # table_punc[l][h] = (1-((cos_sep_1 + cos_sep_2) / 2)).item()\n", + " table_punc[l][h] = ((cos_sep_1 + cos_sep_2) / 2).item()\n", + "\n", + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "ax1.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax1.set_title(\"SEP Cosine Similarity w/ Teacher\")\n", + "heatmap = ax1.pcolor(table_sep, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax1)\n", + "\n", + "ax2.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax2.set_title(\"CLS Cosine Similarity w/ Teacher\")\n", + "heatmap = ax2.pcolor(table_cls, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax2)\n", + "\n", + "ax3.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax3.set_title(\"PUNC Cosine Similarity w/ Teacher\")\n", + "heatmap = ax3.pcolor(table_punc, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax3)\n", + "# plt.show()\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f31801d", + "metadata": {}, + "outputs": [], + "source": [ + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "\n", + "student_model.eval()\n", + "teacher_model.eval()\n", + "student_model.to(device)\n", + "teacher_model.to(device)\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + "student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + "st_values = student_values\n", + "tc_values = teacher_values\n", + "\n", + "loss_cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)\n", + "\n", + "norm_type = 1 # 0 : f(x) 1 : p*f(x)\n", + "\n", + "table_sep = [[0] * head_num for i in range(layer_num)]\n", + "table_cls = [[0] * head_num for i in range(layer_num)]\n", + "table_punc = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "for l in range(layer_num):\n", + " for h in range(head_num): \n", + " cos_sep_1 = loss_cos(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + " cos_sep_2 = loss_cos(st_values[l][norm_type][0,h,sep_index[1],:], tc_values[l][norm_type][0,h,sep_index[1],:]) \n", + " table_sep[l][h] = (1-((cos_sep_1 + cos_sep_2) / 2)).item()\n", + " \n", + " cos_cls = loss_cos(st_values[l][norm_type][0,h,0,:], tc_values[l][norm_type][0,h,0,:])\n", + " table_cls[l][h] = (1-cos_cls).item()\n", + " \n", + " cos_punc_1 = loss_cos(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:]) \n", + " cos_punc_2 = loss_cos(st_values[l][norm_type][0,h,sep_index[1]-1,:], tc_values[l][norm_type][0,h,sep_index[1]-1,:]) \n", + " table_punc[l][h] = (1-((cos_sep_1 + cos_sep_2) / 2)).item()\n", + "\n", + "fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(32,8))\n", + "ax1.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax1.set_title(\"SEP Cosine Similarity w/ Teacher\")\n", + "heatmap = ax1.pcolor(table_sep, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax1)\n", + "\n", + "ax2.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax2.set_title(\"CLS Cosine Similarity w/ Teacher\")\n", + "heatmap = ax2.pcolor(table_cls, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax2)\n", + "\n", + "ax3.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "ax3.set_title(\"PUNC Cosine Similarity w/ Teacher\")\n", + "heatmap = ax3.pcolor(table_punc, cmap=plt.cm.Blues)\n", + "fig.colorbar(heatmap, ax=ax3)\n", + "plt.show()\n", + "\n", + "# # ax4.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "# # ax4.set_title(\"Other Probability AVG\")\n", + "# # heatmap = ax4.pcolor(table_other, cmap=plt.cm.Blues)\n", + "\n", + "# norm_type = 1 # 0 : f(x) 1 : p*f(x)\n", + "# table_sep = [[0] * head_num for i in range(layer_num)]\n", + "# table_cls = [[0] * head_num for i in range(layer_num)]\n", + "# table_punc = [[0] * head_num for i in range(layer_num)]\n", + "# # table_other = [[0] * head_num for i in range(layer_num)]\n", + "\n", + "# for l in range(layer_num):\n", + "# for h in range(head_num): \n", + "# norm_sep = (norm_func(values[l][norm_type][0,h,sep_index[0],:], dim=-1).mean() + norm_func(values[l][norm_type][0,h,sep_index[1],:], dim=-1).mean() / 2).item()\n", + "# table_sep[l][h] = norm_sep\n", + "# norm_cls = norm_func(values[l][norm_type][0,h,0,:], dim=-1).mean().item()\n", + "# table_cls[l][h] = norm_cls\n", + "# norm_punc = (norm_func(values[l][norm_type][0,h,sep_index[0] - 1,:], dim=-1).mean() + norm_func(values[l][norm_type][0,h,sep_index[1] - 1,:], dim=-1).mean() / 2).item()\n", + "# table_punc[l][h] = norm_punc\n", + "# # prob_other = 1 - (prob_sep + prob_sep + prob_punc)\n", + "# # table_other[l][h] = prob_other\n", + "\n", + "# fig, [ax1,ax2,ax3] = plt.subplots(1, 3, figsize=(24,8))\n", + "# ax1.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "# ax1.set_title(\"SEP ||p*f(x)|| Norm AVG\")\n", + "# heatmap = ax1.pcolor(table_sep, cmap=plt.cm.Blues)\n", + "\n", + "# ax2.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "# ax2.set_title(\"CLS ||p*f(x)|| Norm AVG\")\n", + "# heatmap = ax2.pcolor(table_cls, cmap=plt.cm.Blues)\n", + "\n", + "# ax3.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "# ax3.set_title(\"PUNC ||p*f(x)|| Norm AVG\")\n", + "# heatmap = ax3.pcolor(table_punc, cmap=plt.cm.Blues)\n", + "# plt.show()\n", + "# # ax4.set_xlabel(\"head\"); ax1.set_ylabel(\"layer\")\n", + "# # ax4.set_title(\"Other Probability AVG\")\n", + "# # heatmap = ax4.pcolor(table_other, cmap=plt.cm.Blues)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcd90fce", + "metadata": {}, + "outputs": [], + "source": [ + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "\n", + "norm_type = 0 # 1 : f(x) 0 : p*f(x)\n", + "\n", + "student_model.eval()\n", + "teacher_model.eval()\n", + "student_model.to(device)\n", + "teacher_model.to(device)\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + "student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + "\n", + " \n", + "# diff_sep_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + "# diff_sep_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1],:], tc_values[l][norm_type][0,h,sep_index[1],:]) \n", + "# sep_list.append(((diff_sep_1 + diff_sep_2) / 2).item())\n", + " \n", + "# diff_cls = mse_func(st_values[l][norm_type][0,h,0,:], tc_values[l][norm_type][0,h,0,:])\n", + "# cls_list.append((diff_cls).item())\n", + " \n", + "# diff_punc_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:])\n", + "# diff_punc_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1]-1,:], tc_values[l][norm_type][0,h,sep_index[1]-1,:]) \n", + "# punc_list.append(((diff_punc_1 + diff_punc_2) / 2).item())\n", + " \n", + "# st_values[l][norm_type][0,h,sep_index[0],:] = 0\n", + "# tc_values[l][norm_type][0,h,sep_index[0],:] = 0 \n", + "# st_values[l][norm_type][0,h,sep_index[1],:] = 0\n", + "# tc_values[l][norm_type][0,h,sep_index[1],:] = 0\n", + " \n", + "# st_values[l][norm_type][0,h,sep_index[0]-1,:] = 0\n", + "# tc_values[l][norm_type][0,h,sep_index[0]-1,:] = 0 \n", + "# st_values[l][norm_type][0,h,sep_index[1]-1,:] = 0\n", + "# tc_values[l][norm_type][0,h,sep_index[1]-1,:] = 0\n", + " \n", + "# st_values[l][norm_type][0,h,0,:] = 0\n", + "# tc_values[l][norm_type][0,h,0,:] = 0\n", + " \n", + " # diff_other = mse_func(st_values[l][norm_type][0,h,:,:], tc_values[l][norm_type][0,h,:,:])\n", + " # other_list.append(diff_other.item())\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c00e0245", + "metadata": {}, + "outputs": [], + "source": [ + "ternary_prob_kl = kld_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afe5724e", + "metadata": {}, + "outputs": [], + "source": [ + "sarq_c_prob_kl = kld_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80e66cb3", + "metadata": {}, + "outputs": [], + "source": [ + "sarq_prob_kl = kld_list" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0055a968", + "metadata": {}, + "outputs": [], + "source": [ + "eval_st = 1\n", + "eval_tc = 0\n", + "\n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=32)\n", + "\n", + "if eval_st:\n", + " print(\"Student Model Inferece\")\n", + " student_model.eval()\n", + " student_result = do_eval(student_model, task_name, eval_dataloader, device, output_mode, eval_labels, num_labels, teacher_model=teacher_model)\n", + " print(f\"Student Result : {student_result}\")\n", + "\n", + "if eval_tc:\n", + " print(\"Teacher Model Inferece\")\n", + " teacher_result = do_eval(teacher_model, task_name, eval_dataloader, device, output_mode, eval_labels, num_labels)\n", + " print(f\"Teacher Result : {teacher_result}\")\n", + " \n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=1)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e82f5612", + "metadata": {}, + "outputs": [], + "source": [ + "seed=42\n", + "random.seed(seed)\n", + "np.random.seed(seed)\n", + "torch.manual_seed(seed)\n", + "mse_func = MSELoss()\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "student_model_dir = os.path.join(model_dir,task_name)\n", + "\n", + "# st_model_name = \"ternary_save\"\n", + "# st_model_name = \"step_2_context\"\n", + "# st_model_name = \"step_2_output\"\n", + "# st_model_name = \"step_2\"\n", + "# st_model_name = \"sarq_step1\"\n", + "\n", + "\n", + "\n", + "build_tc = 1\n", + "build_st = 1\n", + "\n", + "if build_tc:\n", + " # Teacher Model Build\n", + " teacher_model_dir = os.path.join(model_dir,task_name)\n", + " teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + " teacher_model.to(device)\n", + " teacher_model.eval()\n", + " model = teacher_model\n", + " \n", + "for st_model_name in [\"1SB_S\", \"1SB_S_M\", \"step_2_S_M\"]:\n", + " student_model_dir = os.path.join(output_dir, task_name, \"exploration\", st_model_name) \n", + "\n", + " if build_st:\n", + " # Student Model Build\n", + " student_config = BertConfig.from_pretrained(student_model_dir\n", + "# quantize_act=True,\n", + "# quantize_weight=True,\n", + "# weight_bits = 2, # Always Ternary when \"quantize_weight = True\"\n", + "# input_bits = 8,\n", + "# clip_val = 2.5,\n", + "# quantize = True,\n", + "# ffn_q_1 = True,\n", + "# ffn_q_2 = True,\n", + "# qkv_q = True,\n", + "# emb_q = True,\n", + "# cls_q = True,\n", + "# clipping = False,\n", + "# layer_num = -1,\n", + "# mean_scale = 0.7,\n", + "# quantizer = \"ternary\",\n", + "# act_quantizer = \"ternary\",\n", + "# init_scaling = 1,\n", + "# clip_ratio = 1,\n", + "# gradient_scaling = False,\n", + "# clip_method = \"minmax\",\n", + "# teacher_attnmap = False,\n", + "# parks = False,\n", + "# stop_grad = False,\n", + "# qk_FP = True,\n", + "# map=False,\n", + "# act_method = \"clipping\"\n", + " )\n", + "\n", + " student_model = QuantBertForSequenceClassification.from_pretrained(student_model_dir, config = student_config, num_labels=num_labels)\n", + " student_model.to(device)\n", + " model = student_model\n", + " print()\n", + "\n", + " # Quantization Option ACT/WEIGHT\n", + " for name, module in student_model.named_modules():\n", + " if isinstance(module, (QuantizeLinear, QuantizeAct, ClipLinear)): \n", + " module.act_flag = True\n", + " module.weight_flag = True\n", + "\n", + " student_model.eval()\n", + " teacher_model.eval()\n", + " student_model.to(device)\n", + " teacher_model.to(device)\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_zip = teacher_model(input_ids_sliced.to(device))\n", + " logits, loss, cls_loss, rep_loss, output_loss, attmap_loss, attscore_loss, coeff_list, student_zip = student_model(input_ids_sliced.to(device), teacher_outputs=None)\n", + " \n", + " \n", + " kld_list = []\n", + "\n", + " student_probs = student_zip[1]\n", + " kl_loss = torch.nn.KLDivLoss(reduction=\"sum\")\n", + "\n", + " for l in range(layer_num):\n", + " for h in range(head_num): \n", + " student = student_probs[l][0,h,:,:]\n", + " teacher = teacher_probs[l][0,h,:,:]\n", + " neg_cross_entropy = teacher * torch.log(student) \n", + " neg_cross_entropy = torch.sum(neg_cross_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + "\n", + "\n", + " # p(t) log p(t) = negative entropy\n", + " neg_entropy = teacher * torch.log(teacher) \n", + " neg_entropy = torch.sum(neg_entropy, dim=-1) # (b, h, s, s) -> (b, h, s)\n", + "\n", + " kl_div = neg_entropy - neg_cross_entropy\n", + " # print(kl_div.mean().item())\n", + " kld_list.append(kl_div.mean().item())\n", + " \n", + " if st_model_name == \"1SB_S\":\n", + " print(st_model_name)\n", + " t_kld_list = kld_list\n", + " elif st_model_name == \"1SB_S_M\":\n", + " print(st_model_name) \n", + " s_kld_list = kld_list\n", + " elif st_model_name == \"step_2_S_M\":\n", + " print(st_model_name)\n", + " sc_kld_list = kld_list \n", + " \n", + " \n", + " \n", + " \n", + " # (layer_context, attention_output, value_layer, self_output_hs)\n", + " st_values = student_zip[0]\n", + " tc_values = teacher_zip\n", + "\n", + " norm_type = 2 \n", + "\n", + " h_num = 1\n", + " sep_list = []\n", + " cls_list = []\n", + " punc_list = []\n", + " other_list = []\n", + "\n", + " for l in range(layer_num):\n", + " for h in range(head_num): \n", + "\n", + " if len(sep_index) == 2:\n", + " diff_sep_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + " diff_sep_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1],:], tc_values[l][norm_type][0,h,sep_index[1],:]) \n", + " sep_list.append(((diff_sep_1 + diff_sep_2) / 2).item())\n", + " else:\n", + " diff_sep = mse_func(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + " sep_list.append(diff_sep.item())\n", + "\n", + " diff_cls = mse_func(st_values[l][norm_type][0,h,0,:], tc_values[l][norm_type][0,h,0,:])\n", + " cls_list.append((diff_cls).item())\n", + "\n", + " if len(sep_index) == 2:\n", + " diff_punc_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:])\n", + " diff_punc_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1]-1,:], tc_values[l][norm_type][0,h,sep_index[1]-1,:]) \n", + " punc_list.append(((diff_punc_1 + diff_punc_2) / 2).item())\n", + " else:\n", + " diff_punc = mse_func(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:])\n", + " punc_list.append((diff_punc).item())\n", + "\n", + " if len(sep_index) == 2:\n", + " st_values[l][norm_type][0,h,sep_index[0],:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0],:] = 0 \n", + " st_values[l][norm_type][0,h,sep_index[1],:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[1],:] = 0\n", + " else:\n", + " st_values[l][norm_type][0,h,sep_index[0],:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0],:] = 0 \n", + "\n", + " if len(sep_index) == 2: \n", + " st_values[l][norm_type][0,h,sep_index[0]-1,:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0]-1,:] = 0 \n", + " st_values[l][norm_type][0,h,sep_index[1]-1,:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[1]-1,:] = 0\n", + " else:\n", + " st_values[l][norm_type][0,h,sep_index[0]-1,:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0]-1,:] = 0\n", + "\n", + " st_values[l][norm_type][0,h,0,:] = 0\n", + " tc_values[l][norm_type][0,h,0,:] = 0\n", + "\n", + " diff_other = mse_func(st_values[l][norm_type][0,h,:,:], tc_values[l][norm_type][0,h,:,:])\n", + " other_list.append(diff_other.item())\n", + "\n", + "\n", + " if st_model_name == \"1SB_S\":\n", + " print(st_model_name)\n", + " v_t_sep = sep_list \n", + " v_t_cls = cls_list \n", + " v_t_punc = punc_list \n", + " v_t_other = other_list \n", + " elif st_model_name == \"1SB_S_M\":\n", + " print(st_model_name) \n", + " v_s_sep = sep_list \n", + " v_s_cls = cls_list \n", + " v_s_punc = punc_list \n", + " v_s_other = other_list \n", + " elif st_model_name == \"step_2_S_M\":\n", + " print(st_model_name)\n", + " v_sc_sep = sep_list \n", + " v_sc_cls = cls_list \n", + " v_sc_punc = punc_list \n", + " v_sc_other = other_list \n", + " elif st_model_name == \"step_2_output\" or st_model_name == \"sarq_step1\": \n", + " print(st_model_name)\n", + " v_so_sep = sep_list \n", + " v_so_cls = cls_list \n", + " v_so_punc = punc_list \n", + " v_so_other = other_list \n", + "\n", + " norm_type = 0 # 1 : Attention Output 0 : Layer Context\n", + "\n", + " h_num = 1\n", + " sep_list = []\n", + " cls_list = []\n", + " punc_list = []\n", + " other_list = []\n", + "\n", + " tokens\n", + "# for l in range(layer_num):\n", + "# if len(sep_index) == 2:\n", + "# diff_sep_1 = mse_func(st_values[l][norm_type][0,sep_index[0],:], tc_values[l][norm_type][0,sep_index[0],:]) \n", + "# diff_sep_2 = mse_func(st_values[l][norm_type][0,sep_index[1],:], tc_values[l][norm_type][0,sep_index[1],:]) \n", + "# sep_list.append(((diff_sep_1 + diff_sep_2) / 2).item())\n", + "# else:\n", + "# diff_sep = mse_func(st_values[l][norm_type][0,sep_index[0],:], tc_values[l][norm_type][0,sep_index[0],:]) \n", + "# sep_list.append(diff_sep.item())\n", + "\n", + "# diff_cls = mse_func(st_values[l][norm_type][0,0,:], tc_values[l][norm_type][0,0,:])\n", + "# cls_list.append((diff_cls).item())\n", + "\n", + "# if len(sep_index) == 2:\n", + "# diff_punc_1 = mse_func(st_values[l][norm_type][0,sep_index[0]-1,:], tc_values[l][norm_type][0,sep_index[0]-1,:])\n", + "# diff_punc_2 = mse_func(st_values[l][norm_type][0,sep_index[1]-1,:], tc_values[l][norm_type][0,sep_index[1]-1,:]) \n", + "# punc_list.append(((diff_punc_1 + diff_punc_2) / 2).item())\n", + "# else:\n", + "# diff_punc = mse_func(st_values[l][norm_type][0,sep_index[0]-1,:], tc_values[l][norm_type][0,sep_index[0]-1,:])\n", + "# punc_list.append((diff_punc).item())\n", + "\n", + "# if len(sep_index) == 2:\n", + "# st_values[l][norm_type][0,sep_index[0],:] = 0\n", + "# tc_values[l][norm_type][0,sep_index[0],:] = 0 \n", + "# st_values[l][norm_type][0,sep_index[1],:] = 0\n", + "# tc_values[l][norm_type][0,sep_index[1],:] = 0\n", + "# else:\n", + "# st_values[l][norm_type][0,sep_index[0],:] = 0\n", + "# tc_values[l][norm_type][0,sep_index[0],:] = 0 \n", + "\n", + "# if len(sep_index) == 2: \n", + "# st_values[l][norm_type][0,sep_index[0]-1,:] = 0\n", + "# tc_values[l][norm_type][0,sep_index[0]-1,:] = 0 \n", + "# st_values[l][norm_type][0,sep_index[1]-1,:] = 0\n", + "# tc_values[l][norm_type][0,sep_index[1]-1,:] = 0\n", + "# else:\n", + "# st_values[l][norm_type][0,sep_index[0]-1,:] = 0\n", + "# tc_values[l][norm_type][0,sep_index[0]-1,:] = 0\n", + "\n", + "# st_values[l][norm_type][0,0,:] = 0\n", + "# tc_values[l][norm_type][0,0,:] = 0\n", + "\n", + "# diff_other = mse_func(st_values[l][norm_type][0,:,:], tc_values[l][norm_type][0,:,:])\n", + "# other_list.append(diff_other.item())\n", + "\n", + " for l in range(layer_num):\n", + " for h in range(head_num): \n", + "\n", + " if len(sep_index) == 2:\n", + " diff_sep_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + " diff_sep_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1],:], tc_values[l][norm_type][0,h,sep_index[1],:]) \n", + " sep_list.append(((diff_sep_1 + diff_sep_2) / 2).item())\n", + " else:\n", + " diff_sep = mse_func(st_values[l][norm_type][0,h,sep_index[0],:], tc_values[l][norm_type][0,h,sep_index[0],:]) \n", + " sep_list.append(diff_sep.item())\n", + "\n", + " diff_cls = mse_func(st_values[l][norm_type][0,h,0,:], tc_values[l][norm_type][0,h,0,:])\n", + " cls_list.append((diff_cls).item())\n", + "\n", + " if len(sep_index) == 2:\n", + " diff_punc_1 = mse_func(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:])\n", + " diff_punc_2 = mse_func(st_values[l][norm_type][0,h,sep_index[1]-1,:], tc_values[l][norm_type][0,h,sep_index[1]-1,:]) \n", + " punc_list.append(((diff_punc_1 + diff_punc_2) / 2).item())\n", + " else:\n", + " diff_punc = mse_func(st_values[l][norm_type][0,h,sep_index[0]-1,:], tc_values[l][norm_type][0,h,sep_index[0]-1,:])\n", + " punc_list.append((diff_punc).item())\n", + "\n", + " if len(sep_index) == 2:\n", + " st_values[l][norm_type][0,h,sep_index[0],:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0],:] = 0 \n", + " st_values[l][norm_type][0,h,sep_index[1],:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[1],:] = 0\n", + " else:\n", + " st_values[l][norm_type][0,h,sep_index[0],:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0],:] = 0 \n", + "\n", + " if len(sep_index) == 2: \n", + " st_values[l][norm_type][0,h,sep_index[0]-1,:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0]-1,:] = 0 \n", + " st_values[l][norm_type][0,h,sep_index[1]-1,:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[1]-1,:] = 0\n", + " else:\n", + " st_values[l][norm_type][0,h,sep_index[0]-1,:] = 0\n", + " tc_values[l][norm_type][0,h,sep_index[0]-1,:] = 0\n", + "\n", + " st_values[l][norm_type][0,h,0,:] = 0\n", + " tc_values[l][norm_type][0,h,0,:] = 0\n", + "\n", + " diff_other = mse_func(st_values[l][norm_type][0,h,:,:], tc_values[l][norm_type][0,h,:,:])\n", + " other_list.append(diff_other.item())\n", + "\n", + " if st_model_name == \"1SB_S\":\n", + " print(st_model_name)\n", + " vw_t_sep = sep_list \n", + " vw_t_cls = cls_list \n", + " vw_t_punc = punc_list \n", + " vw_t_other = other_list \n", + " elif st_model_name == \"1SB_S_M\":\n", + " print(st_model_name)\n", + " vw_s_sep = sep_list \n", + " vw_s_cls = cls_list \n", + " vw_s_punc = punc_list \n", + " vw_s_other = other_list \n", + " elif st_model_name == \"step_2_S_M\":\n", + " print(st_model_name)\n", + " vw_sc_sep = sep_list \n", + " vw_sc_cls = cls_list \n", + " vw_sc_punc = punc_list \n", + " vw_sc_other = other_list \n", + " elif st_model_name == \"step_2_output\" or st_model_name == \"sarq_step1\": \n", + " print(st_model_name)\n", + " vw_so_sep = sep_list \n", + " vw_so_cls = cls_list \n", + " vw_so_punc = punc_list \n", + " vw_so_other = other_list \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a16b4e61", + "metadata": {}, + "outputs": [], + "source": [ + "st_values[0][0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cee053f1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b7999f0", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "import matplotlib.ticker as mtick\n", + "fig, ax2 = plt.subplots(1,1, figsize=(8, 5.5), dpi=70)\n", + "plt.rcParams['axes.linewidth'] = 2.2\n", + "plt.rcParams['patch.linewidth'] = 2.2\n", + "\n", + "font_size = 23\n", + "line_w =1.5\n", + "\n", + "x_axis_num = layer_num * head_num\n", + "\n", + "# ax1.plot(list(range(x_axis_num)),v_t_cls, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),v_sc_cls, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),v_s_cls, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# # ax1.plot(list(range(x_axis_num)),v_so_cls, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "# ax1.tick_params(axis=\"x\", labelsize=font_size)\n", + "# ax1.tick_params(axis=\"y\", labelsize=font_size)\n", + "# ax1.legend(fontsize=font_size, loc=1)\n", + "# ax1.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax1.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))\n", + "# # ax1.set_title(\"CLS Value Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "\n", + "ax2.plot(list(range(x_axis_num)),v_t_sep, label=\"Ternary\", color='r', linewidth=line_w)\n", + "ax2.plot(list(range(x_axis_num)),v_s_sep, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "ax2.plot(list(range(x_axis_num)),v_sc_sep, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# ax2.plot(list(range(x_axis_num)),v_so_sep, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "ax2.legend(fontsize=font_size, loc=1)\n", + "ax2.set_xlabel(\"Head\", fontsize=font_size)\n", + "ax2.set_ylabel(\"MSE\", fontsize=font_size)\n", + "ax2.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))\n", + "# ax2.set_title(\"SEP Value Vector \", fontsize=font_size, fontweight=\"light\")\n", + "ax2.tick_params(axis=\"x\", labelsize=font_size)\n", + "ax2.tick_params(axis=\"y\", labelsize=font_size)\n", + "\n", + "# ax3.plot(list(range(x_axis_num)),v_t_punc, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax3.plot(list(range(x_axis_num)),v_s_punc, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "# ax3.plot(list(range(x_axis_num)),v_sc_punc, label=\"SARQ_C\", color='b', linewidth=line_w)\n", + "# # ax3.plot(list(range(x_axis_num)),v_so_punc, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "# ax3.legend(fontsize=font_size, loc=1)\n", + "# ax3.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax3.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))\n", + "# # ax3.set_title(\"PUNC Value Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "# ax4.plot(list(range(x_axis_num)),v_t_other, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax4.plot(list(range(x_axis_num)),v_s_other, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "# ax4.plot(list(range(x_axis_num)),v_sc_other, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# # ax4.plot(list(range(x_axis_num)),v_so_other, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "# ax4.legend(fontsize=font_size, loc=1)\n", + "# ax4.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax4.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))\n", + "# # ax4.set_title(\"Other Value Vector \", fontsize=font_size, fontweight=\"light\")\n", + "plt.show()\n", + "\n", + "print(\"=====================================================================================================================\")\n", + "\n", + "fig, ax2 = plt.subplots(1,1, figsize=(8, 5.5), dpi=70)\n", + "\n", + "x_axis_num = layer_num * head_num\n", + "\n", + "# ax1.plot(list(range(x_axis_num)),vw_t_cls, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),vw_s_cls, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),vw_sc_cls, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# # ax1.plot(list(range(x_axis_num)),vw_so_cls, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "# ax1.legend(fontsize=font_size, loc=1)\n", + "# ax1.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax1.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# # ax1.set_title(\"CLS LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "\n", + "ax2.plot(list(range(x_axis_num)),vw_t_sep, label=\"Ternary\", color='r', linewidth=line_w)\n", + "ax2.plot(list(range(x_axis_num)),vw_s_sep, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "ax2.plot(list(range(x_axis_num)),vw_sc_sep, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# ax2.plot(list(range(x_axis_num)),vw_so_sep, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "ax2.legend(fontsize=font_size, loc=1)\n", + "ax2.set_ylabel(\"MSE\", fontsize=font_size)\n", + "ax2.set_xlabel(\"Head\", fontsize=font_size)\n", + "ax2.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# ax2.set_title(\"SEP LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "ax2.tick_params(axis=\"x\", labelsize=font_size)\n", + "ax2.tick_params(axis=\"y\", labelsize=font_size)\n", + "\n", + "# ax3.plot(list(range(x_axis_num)),vw_t_punc, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax3.plot(list(range(x_axis_num)),vw_s_punc, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "# ax3.plot(list(range(x_axis_num)),vw_sc_punc, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# # ax3.plot(list(range(x_axis_num)),vw_so_punc, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "# ax3.legend(fontsize=font_size, loc=1)\n", + "# ax3.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax3.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# #x3.set_title(\"PUNC LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "# ax4.plot(list(range(x_axis_num)),vw_t_other, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax4.plot(list(range(x_axis_num)),vw_s_other, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "# ax4.plot(list(range(x_axis_num)),vw_sc_other, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# #ax4.plot(list(range(x_axis_num)),vw_so_other, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "# ax4.legend(fontsize=font_size, loc=1)\n", + "# ax4.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax4.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# # ax4.set_title(\"Other LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "plt.show()\n", + "\n", + "fig, ax2 = plt.subplots(1,1, figsize=(8, 5.5), dpi=70)\n", + "\n", + "x_axis_num = layer_num * head_num\n", + "\n", + "# ax1.plot(list(range(x_axis_num)),vw_t_cls, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),vw_s_cls, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),vw_sc_cls, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# # ax1.plot(list(range(x_axis_num)),vw_so_cls, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "# ax1.legend(fontsize=font_size, loc=1)\n", + "# ax1.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax1.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# # ax1.set_title(\"CLS LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "\n", + "ax2.plot(list(range(x_axis_num)),t_kld_list, label=\"Ternary\", color='r', linewidth=line_w)\n", + "ax2.plot(list(range(x_axis_num)),s_kld_list, label=\"SARQ-1step\", color='c', linewidth=line_w)\n", + "ax2.plot(list(range(x_axis_num)),sc_kld_list, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# ax2.plot(list(range(x_axis_num)),vw_so_sep, label=\"SARQ_O\", color='tab:orange', linewidth=line_w)\n", + "\n", + "ax2.legend(fontsize=font_size, loc=1)\n", + "ax2.set_ylabel(\"KL_Divergence\", fontsize=font_size)\n", + "ax2.set_xlabel(\"Head\", fontsize=font_size)\n", + "ax2.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# ax2.set_title(\"SEP LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "ax2.tick_params(axis=\"x\", labelsize=font_size)\n", + "ax2.tick_params(axis=\"y\", labelsize=font_size)\n", + "# fig, [ax1, ax2, ax3, ax4] = plt.subplots(4,1, figsize=(16, 24), dpi=70)\n", + "\n", + "# x_axis_num = layer_num * head_num\n", + "\n", + "# ax1.plot(list(range(x_axis_num)),vw_t_cls, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),vw_s_cls, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# ax1.plot(list(range(x_axis_num)),vw_sc_cls, label=\"SARQ_C\", color='c', linewidth=line_w)\n", + "\n", + "# ax1.legend(fontsize=font_size, loc=1)\n", + "# ax1.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax1.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# ax1.set_title(\"CLS LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "\n", + "# ax2.plot(list(range(x_axis_num)),vw_t_sep, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax2.plot(list(range(x_axis_num)),vw_s_sep, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# ax2.plot(list(range(x_axis_num)),vw_sc_sep, label=\"SARQ_C\", color='c', linewidth=line_w)\n", + "\n", + "# ax2.legend(fontsize=font_size, loc=1)\n", + "# ax2.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax2.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# ax2.set_title(\"SEP LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "# ax3.plot(list(range(x_axis_num)),vw_t_punc, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax3.plot(list(range(x_axis_num)),vw_s_punc, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# ax3.plot(list(range(x_axis_num)),vw_sc_punc, label=\"SARQ_C\", color='c', linewidth=line_w)\n", + "\n", + "# ax3.legend(fontsize=font_size, loc=1)\n", + "# ax3.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax3.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# ax3.set_title(\"PUNC LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "\n", + "# ax4.plot(list(range(x_axis_num)),vw_t_other, label=\"Ternary\", color='r', linewidth=line_w)\n", + "# ax4.plot(list(range(x_axis_num)),vw_s_other, label=\"SARQ\", color='b', linewidth=line_w)\n", + "# ax4.plot(list(range(x_axis_num)),vw_sc_other, label=\"SARQ_C\", color='c', linewidth=line_w)\n", + "\n", + "# ax4.legend(fontsize=font_size, loc=1)\n", + "# ax4.set_ylabel(\"MSE\", fontsize=font_size)\n", + "# ax4.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2f'))\n", + "# ax4.set_title(\"Other LN Output Vector \", fontsize=font_size, fontweight=\"light\")\n", + "# plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "170820ce", + "metadata": {}, + "outputs": [], + "source": [ + "fig, [ax1, ax2, ax3, ax4] = plt.subplots(4, 1, figsize=(12,16), dpi=80)\n", + "\n", + "x_axis_num = layer_num * head_num\n", + "\n", + "ax1.set_title(\"Period Token\")\n", + "ax1.plot(list(range(x_axis_num)),t_punc_list, label=\"Ternary_punc\", color='r', linewidth=1)\n", + "ax1.plot(list(range(x_axis_num)),SB_punc_list, label=\"SARQ_punc\", color='c', linewidth=1)\n", + "ax1.legend(fontsize=\"12\")\n", + "\n", + "ax2.set_title(\"CLS Token\")\n", + "ax2.plot(list(range(x_axis_num)),t_cls_list, label=\"Ternary_cls\", color='r', linewidth=1)\n", + "ax2.plot(list(range(x_axis_num)),SB_cls_list, label=\"SARQ_cls\", color='c', linewidth=1)\n", + "ax2.legend(fontsize=\"12\")\n", + "\n", + "ax3.set_title(\"SEP Token\")\n", + "ax3.plot(list(range(x_axis_num)),t_sep_list, label=\"Ternary_sep\", color='r', linewidth=1)\n", + "ax3.plot(list(range(x_axis_num)),SB_sep_list, label=\"SARQ_sep\", color='c', linewidth=1)\n", + "ax3.legend(fontsize=\"12\")\n", + "\n", + "ax4.set_title(\"Other Token\")\n", + "ax4.plot(list(range(x_axis_num)),t_other_list, label=\"Ternary_other\", color='r', linewidth=1)\n", + "ax4.plot(list(range(x_axis_num)),SB_other_list, label=\"SARQ_other\", color='c', linewidth=1)\n", + "ax4.legend(fontsize=\"12\")\n", + "\n", + "# fig, ax = plt.subplots(1, 1, figsize=(16,8))\n", + "# ax.plot(list(range(144)),t_kld_list, label=\"Ternary_other\", color='r', linewidth=1.5)\n", + "# ax.plot(list(range(144)),s_kld_list, label=\"SARQ_other\", color='c', linewidth=1.5)\n", + "# ax.legend(fontsize=\"20\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "288feea9", + "metadata": {}, + "outputs": [], + "source": [ + "loss_cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)\n", + "teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids_sliced.to(device))\n", + "student_logits, student_atts, student_reps, student_probs, student_values = student_model(input_ids_sliced.to(device), teacher_probs=teacher_probs)\n", + "\n", + "st_values = student_values\n", + "tc_values = teacher_values\n", + "\n", + "loss_cos(st_values[l][norm_type][0,2,sep_index[0],:], tc_values[l][norm_type][0,2,sep_index[0],:])\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "507b21a7", + "metadata": {}, + "outputs": [], + "source": [ + "a = st_values[l][norm_type][0,2,sep_index[0],:]\n", + "b = tc_values[l][norm_type][0,2,sep_index[0],:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9aaef2b", + "metadata": {}, + "outputs": [], + "source": [ + "mse_func(st_values[0][0], tc_values[0][0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f55a8270", + "metadata": {}, + "outputs": [], + "source": [ + "torch.matmul(a, b) / (norm_func(a) * norm_func(b))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7c481d4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/QAT_analysis_plot.ipynb b/notebooks/QAT_analysis_plot.ipynb new file mode 100644 index 0000000..c79fb4d --- /dev/null +++ b/notebooks/QAT_analysis_plot.ipynb @@ -0,0 +1,590 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9e258bcc", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import absolute_import, division, print_function\n", + "\n", + "import pprint\n", + "import argparse\n", + "import logging\n", + "import os\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"3\" # Set GPU Index to use\n", + "os.environ['CUDA_LAUNCH_BLOCKING'] = \"1\"\n", + "import random\n", + "import sys\n", + "import pickle\n", + "import copy\n", + "import collections\n", + "import math\n", + "\n", + "import numpy as np\n", + "import numpy\n", + "import torch\n", + "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler,TensorDataset\n", + "# from torch.utils.tensorboard import SummaryWriter\n", + "\n", + "from torch.nn import CrossEntropyLoss, MSELoss\n", + "from tqdm import tqdm\n", + "from transformer import BertForSequenceClassification,WEIGHTS_NAME, CONFIG_NAME\n", + "from transformer.modeling_quant import BertForSequenceClassification as QuantBertForSequenceClassification\n", + "from transformer import BertTokenizer\n", + "from transformer import BertAdam\n", + "from transformer import BertConfig\n", + "from transformer import QuantizeLinear, QuantizeAct, BertSelfAttention, FP_BertSelfAttention, ClipLinear\n", + "from utils_glue import *\n", + "from bertviz import model_view\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import torch.nn.functional as F\n", + " \n", + "def get_tensor_data(output_mode, features):\n", + " if output_mode == \"classification\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.long)\n", + " elif output_mode == \"regression\":\n", + " all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)\n", + "\n", + "\n", + " all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long)\n", + " all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)\n", + " all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)\n", + " all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)\n", + " tensor_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,all_label_ids, all_seq_lengths)\n", + " return tensor_data, all_label_ids\n", + "\n", + "def do_eval(model, task_name, eval_dataloader,\n", + " device, output_mode, eval_labels, num_labels, teacher_model=None):\n", + " eval_loss = 0\n", + " nb_eval_steps = 0\n", + " preds = []\n", + "\n", + " for batch_ in tqdm(eval_dataloader, desc=\"Inference\"):\n", + " batch_ = tuple(t.to(device) for t in batch_)\n", + " \n", + " with torch.no_grad():\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch_\n", + "\n", + " # teacher attnmap test\n", + " if teacher_model is not None: \n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_values = teacher_model(input_ids, segment_ids, input_mask)\n", + " logits, loss, cls_loss, rep_loss, output_loss, attmap_loss, attscore_loss, coeff_list, _ = model(input_ids, segment_ids, input_mask, teacher_outputs=(teacher_probs, teacher_values, teacher_reps, teacher_logits, teacher_atts), output_mode=output_mode, seq_lengths=seq_lengths)\n", + " else:\n", + " outputs = model(input_ids, segment_ids, input_mask)\n", + " logits = outputs[0]\n", + " \n", + " # create eval loss and other metric required by the task\n", + " if output_mode == \"classification\":\n", + " loss_fct = CrossEntropyLoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1))\n", + " elif output_mode == \"regression\":\n", + " loss_fct = MSELoss()\n", + " tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1))\n", + "\n", + " eval_loss += tmp_eval_loss.mean().item()\n", + " nb_eval_steps += 1\n", + " if len(preds) == 0:\n", + " preds.append(logits.detach().cpu().numpy())\n", + " else:\n", + " preds[0] = np.append(\n", + " preds[0], logits.detach().cpu().numpy(), axis=0)\n", + "\n", + " eval_loss = eval_loss / nb_eval_steps\n", + "\n", + " preds = preds[0]\n", + " if output_mode == \"classification\":\n", + " preds = np.argmax(preds, axis=1)\n", + " elif output_mode == \"regression\":\n", + " preds = np.squeeze(preds)\n", + " result = compute_metrics(task_name, preds, eval_labels.numpy())\n", + " result['eval_loss'] = eval_loss\n", + " return result\n", + "\n", + "def soft_cross_entropy(predicts, targets):\n", + " student_likelihood = torch.nn.functional.log_softmax(predicts, dim=-1)\n", + " targets_prob = torch.nn.functional.softmax(targets, dim=-1)\n", + " return torch.sum((- targets_prob * student_likelihood), dim=-1).mean()\n", + "\n", + "processors = {\n", + " \"cola\": ColaProcessor,\n", + " \"mnli\": MnliProcessor,\n", + " \"mnli-mm\": MnliMismatchedProcessor,\n", + " \"mrpc\": MrpcProcessor,\n", + " \"sst-2\": Sst2Processor,\n", + " \"sts-b\": StsbProcessor,\n", + " \"qqp\": QqpProcessor,\n", + " \"qnli\": QnliProcessor,\n", + " \"rte\": RteProcessor \n", + "}\n", + "\n", + "output_modes = {\n", + " \"cola\": \"classification\",\n", + " \"mnli\": \"classification\",\n", + " \"mrpc\": \"classification\",\n", + " \"sst-2\": \"classification\",\n", + " \"sts-b\": \"regression\",\n", + " \"qqp\": \"classification\",\n", + " \"qnli\": \"classification\",\n", + " \"rte\": \"classification\"\n", + "}\n", + "\n", + "default_params = {\n", + " \"cola\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\": 50}, # No Aug : 50 Aug : 400\n", + " \"mnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":8000},\n", + " \"mrpc\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"sst-2\": {\"max_seq_length\": 64,\"batch_size\":1,\"eval_step\":100},\n", + " \"sts-b\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":100},\n", + " \"qqp\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"qnli\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\":1000},\n", + " \"rte\": {\"max_seq_length\": 128,\"batch_size\":1,\"eval_step\": 20}\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "535b6819", + "metadata": {}, + "source": [ + "## Task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63ce4c7b", + "metadata": {}, + "outputs": [], + "source": [ + "task_name = \"cola\"\n", + "bert_size = \"base\"\n", + "\n", + "if bert_size == \"large\":\n", + " layer_num = 24\n", + " head_num = 16\n", + "else: \n", + " layer_num = 12\n", + " head_num = 12" + ] + }, + { + "cell_type": "markdown", + "id": "5d8dfd91", + "metadata": {}, + "source": [ + "## Dataset Input Setting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10a16f27", + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "teacher_model_dir = os.path.join(model_dir,task_name)\n", + "\n", + "# Processor & Task Info\n", + "processor = processors[task_name]()\n", + "output_mode = output_modes[task_name]\n", + "label_list = processor.get_labels()\n", + "num_labels = len(label_list)\n", + "\n", + "if task_name in default_params:\n", + " batch_size = default_params[task_name][\"batch_size\"]\n", + " max_seq_length = default_params[task_name][\"max_seq_length\"]\n", + " eval_step = default_params[task_name][\"eval_step\"]\n", + " \n", + "# Tokenizer\n", + "tokenizer = BertTokenizer.from_pretrained(teacher_model_dir, do_lower_case=True)\n", + "\n", + "\n", + "# Load Dataset\n", + "data_dir = os.path.join(\"data\",task_name)\n", + "processed_data_dir = os.path.join(data_dir,'preprocessed')\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)\n", + "eval_features = convert_examples_to_features(eval_examples, label_list, max_seq_length, tokenizer, output_mode)\n", + "# dev_file = train_file = os.path.join(processed_data_dir,'dev.pkl') \n", + "# eval_features = pickle.load(open(dev_file,'rb'))\n", + "\n", + "eval_data, eval_labels = get_tensor_data(\"classification\", eval_features)\n", + "eval_sampler = SequentialSampler(eval_data)\n", + "eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=1)\n", + "eval_data, eval_labels = get_tensor_data(output_mode, eval_features)\n", + "\n", + "eval_examples = processor.get_dev_examples(data_dir)\n", + "\n", + "# Sampling Sentence \n", + "i = 0 \n", + "# num = 3\n", + "num = 43\n", + "\n", + "for step, batch in enumerate(eval_dataloader):\n", + " # model.train()\n", + " \n", + " batch = tuple(t.to(device) for t in batch)\n", + " input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch\n", + " i = i + 1\n", + " if i == num:\n", + " break\n", + "\n", + "seq_length = seq_lengths.item()\n", + "\n", + "input_ids_sliced = input_ids[:,:seq_length]\n", + "input_id = []\n", + "for i in input_ids_sliced[0]:\n", + " input_id.append(i.item())\n", + "tokens = tokenizer.convert_ids_to_tokens(input_id)\n", + "\n", + "\n", + "\n", + "sample_sentence_a = str()\n", + "sample_sentence_b = str()\n", + "index = 0\n", + "\n", + "for i, word in enumerate(tokens[1:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_a += word\n", + " sample_sentence_a += \" \"\n", + "index = i\n", + "\n", + "for i, word in enumerate(tokens[index+2:-1]):\n", + " if word == \"[SEP]\":\n", + " break\n", + " sample_sentence_b += word\n", + " sample_sentence_b += \" \"\n", + "\n", + "sep_index = torch.where(input_ids[0] == 102)[0]\n", + "\n", + "if len(sample_sentence_b) > 1:\n", + " sample_sentence_b_start = segment_ids[0].tolist().index(1)\n", + "else:\n", + " sample_sentence_b_start = None\n", + "\n", + "print(f\"input_ids : {input_ids_sliced}\")\n", + "print(f\"tokens : {tokens}\")\n", + "print(f\"A : {sample_sentence_a}\")\n", + "print(f\"B : {sample_sentence_b}\")\n", + "print(sep_index)\n", + "\n", + "for i, token in enumerate(tokens):\n", + " tokens[i] = token\n" + ] + }, + { + "cell_type": "markdown", + "id": "023c3648", + "metadata": {}, + "source": [ + "## Model Load" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "35d1b723", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "05/21 04:45:56 PM Loading model models/cola/pytorch_model.bin\n", + "05/21 04:45:56 PM loading model...\n", + "05/21 04:45:56 PM done!\n", + "05/21 04:45:56 PM Weights from pretrained model not used in BertForSequenceClassification: ['bert.embeddings.position_ids']\n", + "05/21 04:45:56 PM loading configuration file output/cola/exploration/1SB_S/config.json\n", + "05/21 04:45:58 PM Loading model models/cola/pytorch_model.bin\n", + "05/21 04:45:59 PM loading model...\n", + "05/21 04:45:59 PM done!\n", + "05/21 04:45:59 PM Weights from pretrained model not used in BertForSequenceClassification: ['bert.embeddings.position_ids']\n", + "\n", + "==> Load Model DONE\n", + "==> Test Inference\n" + ] + } + ], + "source": [ + "st_model = \"qat\" # QAT, Q\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "model_dir = \"models\"\n", + "output_dir = \"output\"\n", + "\n", + "if bert_size == \"large\":\n", + " model_dir = os.path.join(model_dir, \"BERT_large\")\n", + " output_dir = os.path.join(output_dir, \"BERT_large\")\n", + "\n", + "# Teacher Model Load\n", + "student_model_dir = os.path.join(model_dir,task_name)\n", + "teacher_model_dir = os.path.join(model_dir,task_name)\n", + "\n", + "if teacher_model is None:\n", + " teacher_model = BertForSequenceClassification.from_pretrained(teacher_model_dir, num_labels=num_labels)\n", + " teacher_model.to(device)\n", + " teacher_model.eval()\n", + " # Inference\n", + " teacher_logits, teacher_atts, teacher_reps, teacher_probs, teacher_zip = teacher_model(input_ids_sliced.to(device)) #input_mask, segment_ids\n", + "\n", + "# QAT Result Model\n", + "if st_model == \"qat\":\n", + " sarq_model_name = \"1SB_O\"\n", + " sarq_model_dir = os.path.join(output_dir, task_name, \"exploration\", sarq_model_name) \n", + " quant_config = BertConfig.from_pretrained(sarq_model_dir) \n", + " sarq_model = QuantBertForSequenceClassification.from_pretrained(sarq_model_dir, config = quant_config, num_labels=num_labels)\n", + "\n", + " sarq_model.to(device)\n", + " sarq_model.eval()\n", + " # Inference\n", + " logits, qat_atts, qat_reps, qat_probs, qat_zip = sarq_model(input_ids_sliced.to(device), teacher_outputs=None, output_mode=output_mode, seq_lengths=seq_lengths)\n", + "\n", + "\n", + "# Direct Quantization Model (Q)\n", + "if st_model == \"q\":\n", + " sarq_model_name = \"1SB_O\"\n", + " sarq_model_dir = os.path.join(output_dir, task_name, \"exploration\", sarq_model_name) \n", + " quant_config = BertConfig.from_pretrained(sarq_model_dir) \n", + " sarq_model = QuantBertForSequenceClassification.from_pretrained(teacher_model_dir, config = quant_config, num_labels=num_labels)\n", + "\n", + " sarq_model.to(device)\n", + " sarq_model.eval()\n", + " # Inference\n", + " logits, q_atts, q_reps, q_probs, q_zip = sarq_model(input_ids_sliced.to(device), teacher_outputs=None, output_mode=output_mode, seq_lengths=seq_lengths)\n", + "\n", + "print()\n", + "print(\"==> Load Model DONE\")\n", + "print(\"==> Test Inference\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "0c3e638d", + "metadata": {}, + "source": [ + "## Attention Prob AVG" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "ad8246de", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + } + ], + "source": [ + "q_avg_attention = []\n", + "qat_avg_attention = []\n", + "tc_avg_attention = []\n", + "\n", + "for l in range(12):\n", + " tc_avg_attention.append(teacher_probs[l][0,:,:,sep_index[0]].mean().item())\n", + " # print(teacher_probs[l][0,:,:,sep_index[0]].mean())\n", + "print()\n", + "for l in range(12):\n", + " qat_avg_attention.append(student_zip[1][l][0,:,:,sep_index[0]].mean().item())\n", + " # print(student_zip[1][l][0,:,:,sep_index[0]].mean())\n", + "print()\n", + "for l in range(12):\n", + " q_avg_attention.append(q_zip[1][l][0,:,:,sep_index[0]].mean().item())\n", + " # print(q_zip[1][l][0,:,:,sep_index[0]].mean())\n" + ] + }, + { + "cell_type": "markdown", + "id": "8940138e", + "metadata": {}, + "source": [ + "## Attention Output MSE Loss" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f452e02f", + "metadata": {}, + "outputs": [], + "source": [ + "mse_func = MSELoss()\n", + "\n", + "\n", + "q_attn_output_list = []\n", + "qat_attn_output_list = []\n", + "tc_attn_output = []\n", + "\n", + "for l in range(12):\n", + " tc_attn_context, tc_attn_output, tc_value_vector, tc_sa_output = teacher_zip[l]\n", + " st_attn_context, st_attn_output, st_value_vector, st_sa_output = student_zip[0][l] \n", + " q_attn_context, q_attn_output, q_value_vector, q_sa_output = q_zip[0][l] \n", + " \n", + " # # print(mse_func(tc_attn_context,st_attn_context).item())\n", + " qat_attn_output_list.append(mse_func(tc_attn_context,st_attn_context).item())\n", + " q_attn_output_list.append(mse_func(tc_attn_context,q_attn_context).item())\n", + " \n", + " # print(mse_func(tc_attn_context,st_attn_context))\n", + " # print(mse_func(tc_attn_context,q_attn_context))\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "id": "211e2acf", + "metadata": {}, + "source": [ + "## Save Torch File" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "195b5e9f", + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(tc_avg_attention, \"tc_sep_prob_avg.pt\")\n", + "torch.save(q_avg_attention, \"q_sep_prob_avg.pt\")\n", + "torch.save(qat_avg_attention, \"qat_sep_prob_avg.pt\")\n", + "\n", + "torch.save(q_attn_output_list, \"q_attn_output_mse.pt\")\n", + "torch.save(qat_attn_output_list, \"qat_attn_output_mse.pt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "880adbac", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "tc_avg_attention = torch.load(\"tc_sep_prob_avg.pt\")\n", + "q_avg_attention = torch.load(\"q_sep_prob_avg.pt\")\n", + "qat_avg_attention = torch.load(\"qat_sep_prob_avg.pt\")\n", + "\n", + "q_attn_output_list = torch.load(\"q_attn_output_mse.pt\")\n", + "qat_attn_output_list = torch.load(\"qat_attn_output_mse.pt\")\n", + "\n", + "\n", + "plt.rcParams['axes.linewidth'] = 1\n", + "plt.rcParams['patch.linewidth'] = 1\n", + "fig, ax = plt.subplots(1, 1, figsize=(8, 5), dpi=70)\n", + "lw = 3.5\n", + "fs = 20\n", + "tc_name = \"FP\"\n", + "qat_name = \"QAT\"\n", + "q_name = \"Q\"\n", + "\n", + "tc_c = \"r\"\n", + "qat_c = \"orange\"\n", + "q_c = \"dodgerblue\"\n", + "al=0.7\n", + "plt.xlabel(\"Layer\", fontsize=fs)\n", + "ax.plot(range(12), tc_avg_attention, linewidth=lw, color=tc_c, label=tc_name, alpha=al, marker=\"o\")\n", + "ax.plot(range(12), qat_avg_attention, linewidth=lw, color=qat_c, label=qat_name, alpha=al, marker=\"o\")\n", + "ax.plot(range(12), q_avg_attention, linewidth=lw, color=q_c, label=q_name, alpha=al, marker=\"o\")\n", + "ax.tick_params(axis=\"x\", labelsize=fs)\n", + "ax.tick_params(axis=\"y\", labelsize=fs)\n", + "ax.set_ylabel(\"Avg Attention (SEP)\", fontsize=fs)\n", + "ax.set_ylim(0, 0.8)\n", + "ax.legend(loc=2, ncol=1, fontsize=fs)\n", + "# ax2.plot(range(12), tc_attn_output, linewidth=lw, color=tc_c, label=tc_name, alpha=0.4)\n", + "\n", + "fig, ax = plt.subplots(1, 1, figsize=(8, 5), dpi=70)\n", + "plt.xlabel(\"Layer\", fontsize=fs)\n", + "ax.plot(range(12), qat_attn_output_list, linewidth=lw, color=qat_c, label=\"QAT_MSE_loss\", alpha=al, marker=\"o\")\n", + "ax.plot(range(12), q_attn_output_list, linewidth=lw, color=q_c, label=\"Q_MSE_loss\", alpha=al, marker=\"o\")\n", + "ax.set_ylabel(\"Attention Output MSE\", fontsize=fs)\n", + "ax.legend(loc=2, ncol=1, fontsize=fs)\n", + "ax.tick_params(axis=\"x\", labelsize=fs)\n", + "ax.tick_params(axis=\"y\", labelsize=fs)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5df230f", + "metadata": {}, + "outputs": [], + "source": [ + "!rm tc_sep_prob_avg.pt\n", + "!rm q_sep_prob_avg.pt\n", + "!rm qat_sep_prob_avg.pt\n", + "\n", + "!rm q_attn_output_mse.pt\n", + "!rm qat_attn_output_mse.pt" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Result_list.ipynb b/notebooks/Result_list.ipynb new file mode 100644 index 0000000..e63c4fb --- /dev/null +++ b/notebooks/Result_list.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1236c376", + "metadata": {}, + "source": [ + "## A6000-2" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0a482c6f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "87.56231306081754\n", + "88.04265853498589\n", + "87.47464503042596\n", + "87.09374160623153\n", + "86.41176470588235\n", + "87.03814720563767\n", + "86.94946252612719\n", + "87.29294891198332\n", + "86.41176470588235\n", + "88.09242608960683\n" + ] + } + ], + "source": [ + "import os\n", + "task = \"mrpc\"\n", + "\n", + "bert = \"BERT_large\"\n", + "size = \"base\"\n", + "\n", + "if size == \"base\":\n", + " output_dir = os.path.join(\"output\", task, \"exploration\")\n", + "else:\n", + " output_dir = os.path.join(\"output\", bert, task, \"exploration\")\n", + " \n", + "seed_list = [1,2,3,4,5,6,7,8,9,10]\n", + "\n", + "for seed in seed_list:\n", + "\n", + " folder_name = f\"sweep_{size}_G_AC_R_{seed}\"\n", + " temp_dir = os.path.join(output_dir, folder_name, \"best_info.txt\")\n", + "\n", + " f = open(temp_dir, 'r')\n", + " print(f.readline())" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "22354238", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "85.58823529411765\n", + "85.33263305322129\n", + "86.16504854368931\n", + "85.87909301651695\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'output/BERT_Tiny_6l/mrpc/exploration/1SB_tiny-6l_S_O_5/best_info.txt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [27]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 12\u001b[0m folder_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1SB_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msize\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_S_O_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mseed\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 13\u001b[0m temp_dir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(output_dir, folder_name, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbest_info.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 15\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtemp_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(f\u001b[38;5;241m.\u001b[39mreadline())\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'output/BERT_Tiny_6l/mrpc/exploration/1SB_tiny-6l_S_O_5/best_info.txt'" + ] + } + ], + "source": [ + "import os\n", + "task = \"mrpc\"\n", + "bert = \"BERT_Tiny_6l\"\n", + "size=\"tiny-6l\"\n", + "\n", + "output_dir = os.path.join(\"output\", bert, task, \"exploration\")\n", + "\n", + "seed_list = [1,2,3,4,5,6,7,8,9,10]\n", + "\n", + "for seed in seed_list:\n", + "\n", + " folder_name = f\"1SB_{size}_S_O_{seed}\"\n", + " temp_dir = os.path.join(output_dir, folder_name, \"best_info.txt\")\n", + "\n", + " f = open(temp_dir, 'r')\n", + " print(f.readline())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aedf5ed4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/run_TI_step_1.sh b/run_TI_step_1.sh new file mode 100644 index 0000000..566f357 --- /dev/null +++ b/run_TI_step_1.sh @@ -0,0 +1,59 @@ +# Quantization Range +quantize=1 + +# Quantization Range +q_qkv=1 +q_ffn_1=1 +q_ffn_2=1 +q_emb=1 +q_cls=1 +layer_num=-1 + +# KD & Ternary Option +mean_scale=0.7 +bert=base + +#===========================================================# +# Logging Option +exp_name=TI_step1 +neptune=0 +save_quantized_model=1 + +# Distill Option +pred_distill=1 +rep_distill=1 +attn_distill=1 +output_distill=1 + +# Teacher Intervention (TI) +teacher_attnmap=0 +teacher_context=0 +teacher_output=1 +# TI-G options +teacher_gradual=0 +teacher_stochastic=0 +teacher_inverted=0 + +# Training Type (downstream, qat_normal, qat_step1, qat_step2) +training_type=qat_normal + +# DA Options +aug_train=0 +aug_N=5 + +learning_rate=2E-5 +# ===========================================================# + +CUDA_VISIBLE_DEVICES=$1 python /home/ms/workspace/git/Teacher-Intervention-KD-QAT/main.py --data_dir data --task_name $2 --bert ${bert} \ +--gpu 1 --quantize ${quantize} --qkv ${q_qkv} --ffn_1 ${q_ffn_1} --ffn_2 ${q_ffn_2} --emb ${q_emb} --cls ${q_cls} \ +--aug_train ${aug_train} \ +--output_distill ${output_distill} --pred_distill ${pred_distill} --rep_distill ${rep_distill} --attn_distill ${attn_distill} \ +--teacher_attnmap ${teacher_attnmap} --teacher_context ${teacher_context} --teacher_output ${teacher_output} --teacher_gradual ${teacher_gradual} --teacher_stochastic ${teacher_stochastic} --teacher_inverted ${teacher_inverted} \ +--training_type ${training_type} \ +--mean_scale ${mean_scale} \ +--exp_name ${exp_name} \ +--save_quantized_model ${save_quantized_model} \ +--neptune ${neptune} \ +--aug_N ${aug_N} \ +--num_train_epochs 3 --seed 1 \ +--learning_rate ${learning_rate} \ No newline at end of file diff --git a/run_TI_step_2.sh b/run_TI_step_2.sh new file mode 100644 index 0000000..cb72a2c --- /dev/null +++ b/run_TI_step_2.sh @@ -0,0 +1,60 @@ +# Quantization Range +quantize=1 + +# Quantization Range +q_qkv=1 +q_ffn_1=1 +q_ffn_2=1 +q_emb=1 +q_cls=1 + +# KD & Ternary Option +mean_scale=0.7 +bert=base + +#===========================================================# +# Logging Option +exp_name=TI_step2 +neptune=1 +save_quantized_model=1 + +# Distill Option +pred_distill=1 +rep_distill=1 +attn_distill=1 +output_distill=1 + +# Teacher Intervention (TI) +teacher_attnmap=0 +teacher_context=0 +teacher_output=0 +# TI-G options +teacher_gradual=0 +teacher_stochastic=0 +teacher_inverted=0 +# For step2 +step1_option=GRAD +# Training Type (qat_step1, qat_step2) +training_type=qat_step2 + +# DA Options +aug_train=0 +aug_N=5 + +learning_rate=2E-5 +# ===========================================================# + +CUDA_VISIBLE_DEVICES=$1 python main.py --data_dir data --task_name $2 --bert ${bert} \ +--gpu 1 --quantize ${quantize} --qkv ${q_qkv} --ffn_1 ${q_ffn_1} --ffn_2 ${q_ffn_2} --emb ${q_emb} --cls ${q_cls} \ +--aug_train ${aug_train} \ +--output_distill ${output_distill} --pred_distill ${pred_distill} --rep_distill ${rep_distill} --attn_distill ${attn_distill} \ +--teacher_attnmap ${teacher_attnmap} --teacher_context ${teacher_context} --teacher_output ${teacher_output} --teacher_gradual ${teacher_gradual} --teacher_stochastic ${teacher_stochastic} --teacher_inverted ${teacher_inverted} \ +--step1_option $step1_option \ +--training_type ${training_type} \ +--mean_scale ${mean_scale} \ +--exp_name ${exp_name} \ +--save_quantized_model ${save_quantized_model} \ +--neptune ${neptune} \ +--aug_N ${aug_N} \ +--num_train_epochs 3 --seed 5 \ +--learning_rate ${learning_rate} \ No newline at end of file diff --git a/transformer/__init__.py b/transformer/__init__.py new file mode 100644 index 0000000..39ad678 --- /dev/null +++ b/transformer/__init__.py @@ -0,0 +1,8 @@ +from .tokenization import BertTokenizer, BasicTokenizer, WordpieceTokenizer +from .modeling import BertForSequenceClassification,BertModel, CONFIG_NAME, WEIGHTS_NAME +from .configuration import BertConfig +from .optimization import BertAdam +from .utils_quant import QuantizeLinear +from .modeling_quant import BertSelfAttention, BertAttention +from .modeling import BertSelfAttention as FP_BertSelfAttention +from .modeling import BertAttention as FP_BertAttention diff --git a/transformer/configuration.py b/transformer/configuration.py new file mode 100644 index 0000000..b5ca148 --- /dev/null +++ b/transformer/configuration.py @@ -0,0 +1,148 @@ +""" BERT model configuration """ + +from __future__ import absolute_import, division, print_function, unicode_literals + +import json +import logging +import sys +import os +import copy +from io import open + +logger = logging.getLogger(__name__) + +#CONFIG_NAME = "config_bert_base.json" +CONFIG_NAME = "config.json" +class BertConfig(object): + r""" + :class:`~transformers.BertConfig` is the configuration class to store the configuration of a + `BertModel`. + + + Arguments: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + layer_norm_eps: The epsilon used by LayerNorm. + """ + + def __init__(self, + vocab_size_or_config_json_file=30522, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02, + layer_norm_eps=1e-12, + **kwargs): + super(BertConfig, self).__init__(**kwargs) + if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 + and isinstance(vocab_size_or_config_json_file, unicode)): + with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: + json_config = json.loads(reader.read()) + for key, value in json_config.items(): + self.__dict__[key] = value + elif isinstance(vocab_size_or_config_json_file, int): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.layer_norm_eps = layer_norm_eps + else: + raise ValueError("First argument must be either a vocabulary size (int)" + " or the path to a pretrained model config file (str)") + + def save_pretrained(self, save_directory): + """ Save a configuration object to the directory `save_directory`, so that it + can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method. + """ + assert os.path.isdir(save_directory), "Saving path should be a directory where the model and configuration can be saved" + + # If we save using the predefined names, we can load using `from_pretrained` + output_config_file = os.path.join(save_directory, CONFIG_NAME) + + self.to_json_file(output_config_file) + logger.info("Configuration saved in {}".format(output_config_file)) + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): + + config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) + # logger.info("loading configuration file {}".format(config_file)) + # Load config + config = cls.from_json_file(config_file) + + # Update config with kwargs if needed + to_remove = [] + for key, value in kwargs.items(): + setattr(config, key, value) + to_remove.append(key) + for key in to_remove: + kwargs.pop(key, None) + + #logger.info("Model config %s", str(config)) + return config + + @classmethod + def from_dict(cls, json_object): + """Constructs a `Config` from a Python dictionary of parameters.""" + config = cls(vocab_size_or_config_json_file=-1) + for key, value in json_object.items(): + setattr(config, key, value) + return config + + @classmethod + def from_json_file(cls, json_file): + """Constructs a `BertConfig` from a json file of parameters.""" + with open(json_file, "r", encoding='utf-8') as reader: + text = reader.read() + return cls.from_dict(json.loads(text)) + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __repr__(self): + return str(self.to_json_string()) + + def to_dict(self): + """Serializes this instance to a Python dictionary.""" + output = copy.deepcopy(self.__dict__) + return output + + def to_json_string(self): + """Serializes this instance to a JSON string.""" + return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n" + + def to_json_file(self, json_file_path): + """ Save this instance to a json file.""" + with open(json_file_path, "w", encoding='utf-8') as writer: + writer.write(self.to_json_string()) \ No newline at end of file diff --git a/transformer/file_utils.py b/transformer/file_utils.py new file mode 100644 index 0000000..8aa5c9e --- /dev/null +++ b/transformer/file_utils.py @@ -0,0 +1,269 @@ +""" +Utilities for working with the local dataset cache. +This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp +Copyright by the AllenNLP authors. +""" +from __future__ import (absolute_import, division, print_function, unicode_literals) + +import json +import logging +import os +import shutil +import tempfile +import fnmatch +from functools import wraps +from hashlib import sha256 +import sys +from io import open + +import boto3 +import requests +from botocore.exceptions import ClientError +from tqdm import tqdm + +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse + +try: + from pathlib import Path + PYTORCH_PRETRAINED_BERT_CACHE = Path(os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + Path.home() / '.pytorch_pretrained_bert')) +except (AttributeError, ImportError): + PYTORCH_PRETRAINED_BERT_CACHE = os.getenv('PYTORCH_PRETRAINED_BERT_CACHE', + os.path.join(os.path.expanduser("~"), '.pytorch_pretrained_bert')) + +CONFIG_NAME = "config.json" +WEIGHTS_NAME = "pytorch_model.bin" + +logger = logging.getLogger(__name__) # pylint: disable=invalid-name + + +def url_to_filename(url, etag=None): + """ + Convert `url` into a hashed filename in a repeatable way. + If `etag` is specified, append its hash to the url's, delimited + by a period. + """ + url_bytes = url.encode('utf-8') + url_hash = sha256(url_bytes) + filename = url_hash.hexdigest() + + if etag: + etag_bytes = etag.encode('utf-8') + etag_hash = sha256(etag_bytes) + filename += '.' + etag_hash.hexdigest() + + return filename + + +def filename_to_url(filename, cache_dir=None): + """ + Return the url and etag (which may be ``None``) stored for `filename`. + Raise ``EnvironmentError`` if `filename` or its stored metadata do not exist. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if sys.version_info[0] == 3 and isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + cache_path = os.path.join(cache_dir, filename) + if not os.path.exists(cache_path): + raise EnvironmentError("file {} not found".format(cache_path)) + + meta_path = cache_path + '.json' + if not os.path.exists(meta_path): + raise EnvironmentError("file {} not found".format(meta_path)) + + with open(meta_path, encoding="utf-8") as meta_file: + metadata = json.load(meta_file) + url = metadata['url'] + etag = metadata['etag'] + + return url, etag + + +def cached_path(url_or_filename, cache_dir=None): + """ + Given something that might be a URL (or might be a local path), + determine which. If it's a URL, download the file and cache it, and + return the path to the cached file. If it's already a local path, + make sure the file exists and then return the path. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if sys.version_info[0] == 3 and isinstance(url_or_filename, Path): + url_or_filename = str(url_or_filename) + if sys.version_info[0] == 3 and isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + parsed = urlparse(url_or_filename) + + if parsed.scheme in ('http', 'https', 's3'): + # URL, so get it from the cache (downloading if necessary) + return get_from_cache(url_or_filename, cache_dir) + elif os.path.exists(url_or_filename): + # File, and it exists. + return url_or_filename + elif parsed.scheme == '': + # File, but it doesn't exist. + raise EnvironmentError("file {} not found".format(url_or_filename)) + else: + # Something unknown + raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) + + +def split_s3_path(url): + """Split a full s3 path into the bucket name and path.""" + parsed = urlparse(url) + if not parsed.netloc or not parsed.path: + raise ValueError("bad s3 path {}".format(url)) + bucket_name = parsed.netloc + s3_path = parsed.path + # Remove '/' at beginning of path. + if s3_path.startswith("/"): + s3_path = s3_path[1:] + return bucket_name, s3_path + + +def s3_request(func): + """ + Wrapper function for s3 requests in order to create more helpful error + messages. + """ + + @wraps(func) + def wrapper(url, *args, **kwargs): + try: + return func(url, *args, **kwargs) + except ClientError as exc: + if int(exc.response["Error"]["Code"]) == 404: + raise EnvironmentError("file {} not found".format(url)) + else: + raise + + return wrapper + + +@s3_request +def s3_etag(url): + """Check ETag on S3 object.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_object = s3_resource.Object(bucket_name, s3_path) + return s3_object.e_tag + + +@s3_request +def s3_get(url, temp_file): + """Pull a file directly from S3.""" + s3_resource = boto3.resource("s3") + bucket_name, s3_path = split_s3_path(url) + s3_resource.Bucket(bucket_name).download_fileobj(s3_path, temp_file) + + +def http_get(url, temp_file): + req = requests.get(url, stream=True) + content_length = req.headers.get('Content-Length') + total = int(content_length) if content_length is not None else None + progress = tqdm(unit="B", total=total) + for chunk in req.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + progress.update(len(chunk)) + temp_file.write(chunk) + progress.close() + + +def get_from_cache(url, cache_dir=None): + """ + Given a URL, look for the corresponding dataset in the local cache. + If it's not there, download it. Then return the path to the cached file. + """ + if cache_dir is None: + cache_dir = PYTORCH_PRETRAINED_BERT_CACHE + if sys.version_info[0] == 3 and isinstance(cache_dir, Path): + cache_dir = str(cache_dir) + + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + + # Get eTag to add to filename, if it exists. + if url.startswith("s3://"): + etag = s3_etag(url) + else: + try: + response = requests.head(url, allow_redirects=True) + if response.status_code != 200: + etag = None + else: + etag = response.headers.get("ETag") + except EnvironmentError: + etag = None + + if sys.version_info[0] == 2 and etag is not None: + etag = etag.decode('utf-8') + filename = url_to_filename(url, etag) + + # get cache path to put the file + cache_path = os.path.join(cache_dir, filename) + + # If we don't have a connection (etag is None) and can't identify the file + # try to get the last downloaded one + if not os.path.exists(cache_path) and etag is None: + matching_files = fnmatch.filter(os.listdir(cache_dir), filename + '.*') + matching_files = list(filter(lambda s: not s.endswith('.json'), matching_files)) + if matching_files: + cache_path = os.path.join(cache_dir, matching_files[-1]) + + if not os.path.exists(cache_path): + # Download to temporary file, then copy to cache dir once finished. + # Otherwise you get corrupt cache entries if the download gets interrupted. + with tempfile.NamedTemporaryFile() as temp_file: + logger.info("%s not found in cache, downloading to %s", url, temp_file.name) + + # GET file object + if url.startswith("s3://"): + s3_get(url, temp_file) + else: + http_get(url, temp_file) + + # we are copying the file before closing it, so flush to avoid truncation + temp_file.flush() + # shutil.copyfileobj() starts at the current position, so go to the start + temp_file.seek(0) + + logger.info("copying %s to cache at %s", temp_file.name, cache_path) + with open(cache_path, 'wb') as cache_file: + shutil.copyfileobj(temp_file, cache_file) + + logger.info("creating metadata file for %s", cache_path) + meta = {'url': url, 'etag': etag} + meta_path = cache_path + '.json' + with open(meta_path, 'w') as meta_file: + output_string = json.dumps(meta) + if sys.version_info[0] == 2 and isinstance(output_string, str): + output_string = unicode(output_string, 'utf-8') # The beauty of python 2 + meta_file.write(output_string) + + logger.info("removing temp file %s", temp_file.name) + + return cache_path + + +def read_set_from_file(filename): + ''' + Extract a de-duped collection (set) of text from a file. + Expected file format is one item per line. + ''' + collection = set() + with open(filename, 'r', encoding='utf-8') as file_: + for line in file_: + collection.add(line.rstrip()) + return collection + + +def get_file_extension(path, dot=True, lower=True): + ext = os.path.splitext(path)[1] + ext = ext if dot else ext[1:] + return ext.lower() if lower else ext diff --git a/transformer/modeling.py b/transformer/modeling.py new file mode 100644 index 0000000..01c003c --- /dev/null +++ b/transformer/modeling.py @@ -0,0 +1,396 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model.""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import math +import os + +import torch +from torch import nn +from torch.autograd import Variable +from .configuration import BertConfig +from .utils_quant import QuantizeLinear, QuantizeEmbedding, SymQuantizer + +logger = logging.getLogger(__name__) + +#CONFIG_NAME = "config_bert_base.json" +CONFIG_NAME = "config.json" +WEIGHTS_NAME = "pytorch_model.bin" +#WEIGHTS_NAME = "FFN_GT_KD_AUG.bin" + +def gelu(x): + """Implementation of the gelu activation function. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + Also see https://arxiv.org/abs/1606.08415 + """ + return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) + +class BertEmbeddings(nn.Module): + def __init__(self, config): + super(BertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx = 0) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids): + + seq_length = input_ids.size(1) + position_ids = torch.arange( + seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config, i): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int( + config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + self.i = i + self.config = config + + def transpose_for_scores(self, x): + new_x_shape = x.size()[ + :-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask=None): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + if attention_mask is not None: + attention_scores = attention_scores + attention_mask + attention_probs = nn.Softmax(dim=-1)(attention_scores) + attention_prob = attention_probs + attention_probs = self.dropout(attention_probs) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer_ = context_layer + + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + + return context_layer, attention_scores, attention_prob, context_layer_ # , value_layer + + +class BertSelfOutput(nn.Module): + def __init__(self, config, i): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int( + config.hidden_size / config.num_attention_heads) + + def forward(self, hidden_states, input_tensor): + + hidden_states = self.dense(hidden_states) + self_output_hs = hidden_states + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states ,self_output_hs + +class BertAttention(nn.Module): + def __init__(self, config, i): + super(BertAttention, self).__init__() + self.self = BertSelfAttention(config, i) + self.output = BertSelfOutput(config, i) + + def forward(self, input_tensor, attention_mask): + self_output, layer_att, layer_probs, layer_context = self.self(input_tensor, attention_mask) + attention_output, self_output_hs = self.output(self_output, input_tensor) + + return attention_output, layer_att, layer_probs, (layer_context, attention_output, self_output_hs) + + +class BertIntermediate(nn.Module): + def __init__(self, config, i): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + + self.i = i + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = gelu(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config, i): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + self.i = i + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config, i): + super(BertLayer, self).__init__() + self.attention = BertAttention(config, i) + self.intermediate = BertIntermediate(config, i) + self.output = BertOutput(config, i) + + def forward(self, hidden_states, attention_mask): + attention_output, layer_att, layer_probs, layer_value = self.attention( + hidden_states, attention_mask) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + + return layer_output, layer_att, layer_probs, layer_value + + +class BertEncoder(nn.Module): + def __init__(self, config): + super(BertEncoder, self).__init__() + self.layer = nn.ModuleList([BertLayer(config, i) + for i in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask): + + all_encoder_layers = [hidden_states] + all_encoder_atts = [] + all_encoder_probs = [] + all_encoder_values = [] + + for _, layer_module in enumerate(self.layer): + hidden_states, layer_att, layer_probs, layer_value = layer_module( + hidden_states, attention_mask) + all_encoder_layers.append(hidden_states) + all_encoder_atts.append(layer_att) + all_encoder_probs.append(layer_probs) + all_encoder_values.append(layer_value) + + return all_encoder_layers, all_encoder_atts, all_encoder_probs, all_encoder_values + + +class BertPooler(nn.Module): + def __init__(self, config): + super(BertPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPreTrainedModel(nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + + def __init__(self, config, *inputs, **kwargs): + super(BertPreTrainedModel, self).__init__() + self.config = config + + def init_bert_weights(self, module): + """ Initialize the weights. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_( + mean=0.0, std=self.config.initializer_range) + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + @classmethod + # MSKIM + def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): + """ + Instantiate a BertPreTrainedModel from a pre-trained model file or a pytorch state dict. + Params: + pretrained_model_name_or_path: + - a path or url to a pretrained model archive containing: + . `bert_config.json` a configuration file for the model + . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance + state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models + config: BertConfig instance + *inputs, **kwargs: additional input for the specific Bert class + (ex: num_labels for BertForSequenceClassification) + """ + state_dict = kwargs.get('state_dict', None) + kwargs.pop('state_dict', None) + config = kwargs.get('config', None) + kwargs.pop('config', None) + + if config is None: + # Load config + config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) + config = BertConfig.from_json_file(config_file) + + #logger.info("Model config {}".format(config)) + # Instantiate model. + + model = cls(config, *inputs, **kwargs) + if state_dict is None: + weights_path = os.path.join( + pretrained_model_name_or_path, WEIGHTS_NAME) + # logger.info("Loading model {}".format(weights_path)) + state_dict = torch.load(weights_path, map_location='cpu') + + # Load from a PyTorch state_dict + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + new_key = None + if 'gamma' in key: + new_key = key.replace('gamma', 'weight') + if 'beta' in key: + new_key = key.replace('beta', 'bias') + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + missing_keys = [] + unexpected_keys = [] + error_msgs = [] + # copy state_dict so _load_from_state_dict can modify it + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + def load(module, prefix=''): + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + start_prefix = '' + if not hasattr(model, 'bert') and any(s.startswith('bert.') for s in state_dict.keys()): + start_prefix = 'bert.' + + # logger.info('loading model...') + + load(model, prefix=start_prefix) + + return model + + +class BertModel(BertPreTrainedModel): + def __init__(self, config): + super(BertModel, self).__init__(config) + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None): + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(input_ids, token_type_ids) + encoded_layers, attention_scores, attention_probs, attention_values = self.encoder(embedding_output, + extended_attention_mask) + + pooled_output = self.pooler(encoded_layers[-1]) + return encoded_layers, attention_scores, attention_probs, attention_values, pooled_output + +class BertForSequenceClassification(BertPreTrainedModel): + def __init__(self, config, num_labels = 2): + super(BertForSequenceClassification, self).__init__(config) + + # MSKIM made exception for MNLI Classifier + if 'num_labels' in config.to_dict(): + self.num_labels = config.num_labels + else: + self.num_labels = num_labels + + self.bert = BertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, self.num_labels) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, + token_type_ids=None, + attention_mask=None, + labels=None): + encoded_layers, attention_scores, attention_probs, attention_values, pooled_output = self.bert(input_ids, token_type_ids, attention_mask) + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + + if labels is not None: + loss_fct = nn.CrossEntropyLoss() + loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) + return loss, attention_scores, encoded_layers + else: + return logits, attention_scores, encoded_layers, attention_probs, attention_values diff --git a/transformer/modeling_quant.py b/transformer/modeling_quant.py new file mode 100644 index 0000000..30b8eea --- /dev/null +++ b/transformer/modeling_quant.py @@ -0,0 +1,482 @@ +# coding=utf-8 +# 2020.04.20 - Add&replace quantization modules +# Huawei Technologies Co., Ltd +# Copyright (c) 2020, Huawei Technologies Co., Ltd. All rights reserved. +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.w +"""PyTorch BERT model.""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import logging +import math +import os + +import torch +from torch import nn +from torch.autograd import Variable +from .configuration import BertConfig +from .utils_quant import QuantizeLinear, QuantizeEmbedding, SymQuantizer, TwnQuantizer + +logger = logging.getLogger(__name__) + +CONFIG_NAME = "config.json" +WEIGHTS_NAME = "pytorch_model.bin" +#WEIGHTS_NAME = "FFN_GT_KD_AUG.bin" +from torch.nn import CrossEntropyLoss, MSELoss + +def soft_cross_entropy(predicts, targets): + student_likelihood = torch.nn.functional.log_softmax(predicts, dim=-1) + targets_prob = torch.nn.functional.softmax(targets, dim=-1) + return torch.sum((- targets_prob * student_likelihood), dim=-1).mean() + +def gelu(x): + """Implementation of the gelu activation function. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + Also see https://arxiv.org/abs/1606.08415 + """ + return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) + +class BertEmbeddings(nn.Module): + def __init__(self, config): + super(BertEmbeddings, self).__init__() + + if config.quantize and config.emb_q: + self.word_embeddings = QuantizeEmbedding(config.vocab_size, config.hidden_size, padding_idx = 0,config=config) + else: + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) + + # position_embeddings and token_type_embeddings are kept in fp32 anyway + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids): + seq_length = input_ids.size(1) + position_ids = torch.arange( + seq_length, dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + embeddings = self.dropout(embeddings) + return embeddings + + +class BertSelfAttention(nn.Module): + def __init__(self, config, i): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError( + "The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int( + config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + self.i = i + self.config = config + self.input_bits = 8 + + # ================================================================================ # + # Weight Quant Setting + # ================================================================================ # + + if self.config.quantize and config.qkv_q: + self.query = QuantizeLinear(config.hidden_size, self.all_head_size,config=config, name=f"layer_{self.i}_{self.__class__.__name__}_query") + self.key = QuantizeLinear(config.hidden_size, self.all_head_size,config=config, name=f"layer_{self.i}_{self.__class__.__name__}_key") + self.value = QuantizeLinear(config.hidden_size, self.all_head_size,config=config, name=f"layer_{self.i}_{self.__class__.__name__}_value") + else: + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + # ================================================================================ # + # ACT Quant Setting + # ================================================================================ # + + self.act_quantizer = SymQuantizer + self.register_buffer('clip_query', torch.Tensor([-config.clip_val, config.clip_val])) + self.register_buffer('clip_key', torch.Tensor([-config.clip_val, config.clip_val])) + self.register_buffer('clip_value', torch.Tensor([-config.clip_val, config.clip_val])) + self.register_buffer('clip_attn', torch.Tensor([-config.clip_val, config.clip_val])) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.size()[ + :-1] + (self.num_attention_heads, self.attention_head_size) + x = x.view(*new_x_shape) + return x.permute(0, 2, 1, 3) + + def forward(self, hidden_states, attention_mask, teacher_probs=None): + # Stop Grad (With TI, stopping gradient is required for internal distillation) + if self.config.teacher_attnmap: + hidden_states_ = hidden_states.clone().detach() + mixed_query_layer = self.query(hidden_states_) + mixed_key_layer = self.key(hidden_states_) + mixed_value_layer = self.value(hidden_states) + elif self.config.teacher_context or self.config.teacher_output: + hidden_states_ = hidden_states.clone().detach() + mixed_query_layer = self.query(hidden_states_) + mixed_key_layer = self.key(hidden_states_) + mixed_value_layer = self.value(hidden_states_) + else: + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + # Batch Size : 16, Max_len_seq : 64 + # q, k, v : 16, 64, 768 + # transpose for scores : 16, 64, 768 -> 16, 64, 12, 64 -> 16, 12(head), 64, 64 + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + query_layer = self.act_quantizer.apply(query_layer, self.clip_query, self.input_bits, True) + key_layer = self.act_quantizer.apply(key_layer, self.clip_key, self.input_bits, True) + + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + attention_scores = attention_scores + attention_mask + st_attention_probs = nn.Softmax(dim=-1)(attention_scores) + + if self.config.teacher_attnmap and teacher_probs is not None: + # Teacher Intervention Map (TI-M) + tc_attention_probs = teacher_probs[0][self.i] + attention_prob = st_attention_probs # attention probs to return (for internal distillation) + attention_probs = self.dropout(tc_attention_probs) # replace student attention map to teacher attention map + else: + attention_prob = st_attention_probs # attention probs to return (for internal distillation) + attention_probs = self.dropout(st_attention_probs) + + # quantize both attention probs and value layer for dot product + attention_probs = self.act_quantizer.apply(attention_probs, self.clip_attn, self.input_bits, True) + value_layer = self.act_quantizer.apply(value_layer, self.clip_value, self.input_bits, True) + + context_layer = torch.matmul(attention_probs, value_layer) + context_layer_ = context_layer + + if self.config.teacher_context and teacher_probs is not None: + # Teacher Intervention Context (TI-C) we insert TI-C stage for giving smoothing effect to Gradual Teacher Intervention + context_layer = teacher_probs[1][self.i][0] # TI/CI - Layer Number - Context + + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(*new_context_layer_shape) + + return context_layer, attention_scores, attention_prob, context_layer_ + +class BertAttention(nn.Module): + def __init__(self, config, i): + super(BertAttention, self).__init__() + self.self = BertSelfAttention(config, i) + self.output = BertSelfOutput(config, i) + self.config = config + self.i = i + + def forward(self, input_tensor, attention_mask, teacher_probs=None): + + self_output, layer_att, layer_probs, layer_context = self.self(input_tensor, attention_mask, teacher_probs=teacher_probs) + attention_output, self_output_hs = self.output(self_output, input_tensor, teacher_probs=teacher_probs) + + return attention_output, layer_att, layer_probs, (layer_context, attention_output, self_output_hs) + + +class BertSelfOutput(nn.Module): + def __init__(self, config, i): + super(BertSelfOutput, self).__init__() + self.config = config + self.i = i + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int( + config.hidden_size / config.num_attention_heads) + + if not self.config.qkv_q: + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + else: + self.dense = QuantizeLinear(config.hidden_size, config.hidden_size,config=config) + + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor, teacher_probs=None): + hidden_states = self.dense(hidden_states) + self_output_hs = hidden_states + + if self.config.teacher_output: + hidden_states = teacher_probs[1][self.i][2] # SA-output + + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states ,self_output_hs + + +class BertIntermediate(nn.Module): + def __init__(self, config, i): + super(BertIntermediate, self).__init__() + self.i = i + + if config.quantize and config.ffn_q_1: + self.dense = QuantizeLinear(config.hidden_size, config.intermediate_size,config=config, name=f"layer_{self.i}_{self.__class__.__name__}") + else: + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = gelu(hidden_states) + return hidden_states + + +class BertOutput(nn.Module): + def __init__(self, config, i): + super(BertOutput, self).__init__() + self.i = i + + if config.quantize and config.ffn_q_2: + self.dense = QuantizeLinear(config.intermediate_size, config.hidden_size,config=config, name=f"layer_{self.i}_{self.__class__.__name__}") + else: + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + + return hidden_states + + +class BertLayer(nn.Module): + def __init__(self, config, i): + super(BertLayer, self).__init__() + self.attention = BertAttention(config, i) + self.intermediate = BertIntermediate(config, i) + self.output = BertOutput(config, i) + + def forward(self, hidden_states, attention_mask, teacher_probs=None): + + attention_output, layer_att, layer_probs, layer_value = self.attention( + hidden_states, attention_mask, teacher_probs=teacher_probs) + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + + return layer_output, layer_att, layer_probs, layer_value + + +class BertEncoder(nn.Module): + def __init__(self, config): + super(BertEncoder, self).__init__() + self.layer = nn.ModuleList([BertLayer(config, i) + for i in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask, teacher_probs=None): + all_encoder_layers = [hidden_states] + all_encoder_atts = [] + all_encoder_probs = [] + all_encoder_values = [] + + for _, layer_module in enumerate(self.layer): + hidden_states, layer_att, layer_probs, layer_value = layer_module( + hidden_states, attention_mask, teacher_probs=teacher_probs) + all_encoder_layers.append(hidden_states) + all_encoder_atts.append(layer_att) + all_encoder_probs.append(layer_probs) + all_encoder_values.append(layer_value) + + return all_encoder_layers, all_encoder_atts, all_encoder_probs, all_encoder_values + + +class BertPooler(nn.Module): + def __init__(self, config, recurs=None): + super(BertPooler, self).__init__() + + if config.quantize and config.cls_q: + self.dense = QuantizeLinear(config.hidden_size, config.hidden_size,config=config, name=f"{self.__class__.__name__}") + else: + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + + self.activation = nn.Tanh() + self.config = config + + def forward(self, hidden_states): + pooled_output = hidden_states[-1][:, 0] + pooled_output = self.dense(pooled_output) + pooled_output = self.activation(pooled_output) + + return pooled_output + + +class BertPreTrainedModel(nn.Module): + """ An abstract class to handle weights initialization and + a simple interface for dowloading and loading pretrained models. + """ + + def __init__(self, config, *inputs, **kwargs): + super(BertPreTrainedModel, self).__init__() + self.config = config + + def init_bert_weights(self, module): + """ Initialize the weights. + """ + if isinstance(module, (nn.Linear, nn.Embedding)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + module.weight.data.normal_( + mean=0.0, std=self.config.initializer_range) + elif isinstance(module, nn.LayerNorm): + module.bias.data.zero_() + module.weight.data.fill_(1.0) + if isinstance(module, nn.Linear) and module.bias is not None: + module.bias.data.zero_() + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): + """ + Instantiate a BertPreTrainedModel from a pre-trained model file or a pytorch state dict. + Params: + pretrained_model_name_or_path: + - a path or url to a pretrained model archive containing: + . `bert_config.json` a configuration file for the model + . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance + state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models + config: BertConfig instance + *inputs, **kwargs: additional input for the specific Bert class + (ex: num_labels for BertForSequenceClassification) + """ + + state_dict = kwargs.get('state_dict', None) + kwargs.pop('state_dict', None) + config = kwargs.get('config', None) + kwargs.pop('config', None) + + if config is None: + # Load config + config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) + config = BertConfig.from_json_file(config_file) + + #logger.info("Model config {}".format(config)) + # Instantiate model. + model = cls(config, *inputs, **kwargs) + if state_dict is None: + weights_path = os.path.join( + pretrained_model_name_or_path, WEIGHTS_NAME) + # logger.info("Loading model {}".format(weights_path)) + state_dict = torch.load(weights_path, map_location='cpu') + + # Load from a PyTorch state_dict + old_keys = [] + new_keys = [] + for key in state_dict.keys(): + new_key = None + if 'gamma' in key: + new_key = key.replace('gamma', 'weight') + if 'beta' in key: + new_key = key.replace('beta', 'bias') + if new_key: + old_keys.append(key) + new_keys.append(new_key) + for old_key, new_key in zip(old_keys, new_keys): + state_dict[new_key] = state_dict.pop(old_key) + + missing_keys = [] + unexpected_keys = [] + error_msgs = [] + # copy state_dict so _load_from_state_dict can modify it + metadata = getattr(state_dict, '_metadata', None) + state_dict = state_dict.copy() + if metadata is not None: + state_dict._metadata = metadata + + def load(module, prefix=''): + local_metadata = {} if metadata is None else metadata.get( + prefix[:-1], {}) + module._load_from_state_dict( + state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) + for name, child in module._modules.items(): + if child is not None: + load(child, prefix + name + '.') + + start_prefix = '' + if not hasattr(model, 'bert') and any(s.startswith('bert.') for s in state_dict.keys()): + start_prefix = 'bert.' + + # logger.info('loading model...') + + load(model, prefix=start_prefix) + + return model + + +class BertModel(BertPreTrainedModel): + def __init__(self, config): + super(BertModel, self).__init__(config) + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + self.apply(self.init_bert_weights) + + def forward(self, input_ids, token_type_ids=None, attention_mask=None, teacher_probs=None): + + if attention_mask is None: + attention_mask = torch.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = torch.zeros_like(input_ids) + + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + # extended_attention_mask = extended_attention_mask.to( + # dtype=next(self.parameters()).dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + embedding_output = self.embeddings(input_ids, token_type_ids) + encoded_layers, attention_scores, attention_probs, attention_values = self.encoder(embedding_output, + extended_attention_mask, teacher_probs=teacher_probs) + + pooled_output = self.pooler(encoded_layers) + return encoded_layers, attention_scores, attention_probs, attention_values, pooled_output + +class BertForSequenceClassification(BertPreTrainedModel): + def __init__(self, config, num_labels = 2): + super(BertForSequenceClassification, self).__init__(config) + self.num_labels = num_labels + self.bert = BertModel(config) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + self.classifier = nn.Linear(config.hidden_size, num_labels) + self.apply(self.init_bert_weights) + self.config = config + + def forward(self, input_ids, + token_type_ids=None, + attention_mask=None, + labels=None, + teacher_outputs=None, + seq_lengths=None): + + encoded_layers, student_atts, attention_probs, attention_values, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, teacher_probs=teacher_outputs) + pooled_output = self.dropout(pooled_output) + logits = self.classifier(pooled_output) + + return logits, student_atts, encoded_layers, attention_probs, attention_values + diff --git a/transformer/optimization.py b/transformer/optimization.py new file mode 100644 index 0000000..15945d1 --- /dev/null +++ b/transformer/optimization.py @@ -0,0 +1,300 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch optimization for BERT model.""" + +import math +import torch +from torch.optim import Optimizer +from torch.optim.optimizer import required +from torch.nn.utils import clip_grad_norm_ +import logging +import abc +import sys + +logger = logging.getLogger(__name__) + + +if sys.version_info >= (3, 4): + ABC = abc.ABC +else: + ABC = abc.ABCMeta('ABC', (), {}) + + +class _LRSchedule(ABC): + """ Parent of all LRSchedules here. """ + warn_t_total = False # is set to True for schedules where progressing beyond t_total steps doesn't make sense + def __init__(self, warmup=0.002, t_total=-1, **kw): + """ + :param warmup: what fraction of t_total steps will be used for linear warmup + :param t_total: how many training steps (updates) are planned + :param kw: + """ + super(_LRSchedule, self).__init__(**kw) + if t_total < 0: + logger.warning("t_total value of {} results in schedule not being applied".format(t_total)) + if not 0.0 <= warmup < 1.0 and not warmup == -1: + raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup)) + warmup = max(warmup, 0.) + self.warmup, self.t_total = float(warmup), float(t_total) + self.warned_for_t_total_at_progress = -1 + + def get_lr(self, step, nowarn=False): + """ + :param step: which of t_total steps we're on + :param nowarn: set to True to suppress warning regarding training beyond specified 't_total' steps + :return: learning rate multiplier for current update + """ + if self.t_total < 0: + return 1. + progress = float(step) / self.t_total + ret = self.get_lr_(progress) + # warning for exceeding t_total (only active with warmup_linear + if not nowarn and self.warn_t_total and progress > 1. and progress > self.warned_for_t_total_at_progress: + # logger.warning( + # "Training beyond specified 't_total'. Learning rate multiplier set to {}. Please set 't_total' of {} correctly." + # .format(ret, self.__class__.__name__)) + self.warned_for_t_total_at_progress = progress + # end warning + return ret + + @abc.abstractmethod + def get_lr_(self, progress): + """ + :param progress: value between 0 and 1 (unless going beyond t_total steps) specifying training progress + :return: learning rate multiplier for current update + """ + return 1. + + +class ConstantLR(_LRSchedule): + def get_lr_(self, progress): + return 1. + + +class WarmupCosineSchedule(_LRSchedule): + """ + Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps. + Decreases learning rate from 1. to 0. over remaining `1 - warmup` steps following a cosine curve. + If `cycles` (default=0.5) is different from default, learning rate follows cosine function after warmup. + """ + warn_t_total = True + def __init__(self, warmup=0.002, t_total=-1, cycles=.5, **kw): + """ + :param warmup: see LRSchedule + :param t_total: see LRSchedule + :param cycles: number of cycles. Default: 0.5, corresponding to cosine decay from 1. at progress==warmup and 0 at progress==1. + :param kw: + """ + super(WarmupCosineSchedule, self).__init__(warmup=warmup, t_total=t_total, **kw) + self.cycles = cycles + + def get_lr_(self, progress): + if progress < self.warmup: + return progress / self.warmup + else: + progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup + return 0.5 * (1. + math.cos(math.pi * self.cycles * 2 * progress)) + + +class WarmupCosineWithHardRestartsSchedule(WarmupCosineSchedule): + """ + Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps. + If `cycles` (default=1.) is different from default, learning rate follows `cycles` times a cosine decaying + learning rate (with hard restarts). + """ + def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw): + super(WarmupCosineWithHardRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw) + assert(cycles >= 1.) + + def get_lr_(self, progress): + if progress < self.warmup: + return progress / self.warmup + else: + progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup + ret = 0.5 * (1. + math.cos(math.pi * ((self.cycles * progress) % 1))) + return ret + + +class WarmupCosineWithWarmupRestartsSchedule(WarmupCosineWithHardRestartsSchedule): + """ + All training progress is divided in `cycles` (default=1.) parts of equal length. + Every part follows a schedule with the first `warmup` fraction of the training steps linearly increasing from 0. to 1., + followed by a learning rate decreasing from 1. to 0. following a cosine curve. + """ + def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw): + assert(warmup * cycles < 1.) + warmup = warmup * cycles if warmup >= 0 else warmup + super(WarmupCosineWithWarmupRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw) + + def get_lr_(self, progress): + progress = progress * self.cycles % 1. + if progress < self.warmup: + return progress / self.warmup + else: + progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup + ret = 0.5 * (1. + math.cos(math.pi * progress)) + return ret + + +class WarmupConstantSchedule(_LRSchedule): + """ + Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps. + Keeps learning rate equal to 1. after warmup. + """ + def get_lr_(self, progress): + if progress < self.warmup: + return progress / self.warmup + return 1. + + +class WarmupLinearSchedule(_LRSchedule): + """ + Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps. + Linearly decreases learning rate from 1. to 0. over remaining `1 - warmup` steps. + """ + warn_t_total = True + def get_lr_(self, progress): + if progress < self.warmup: + return progress / self.warmup + return max((progress - 1.) / (self.warmup - 1.), 0.) + + +SCHEDULES = { + None: ConstantLR, + "none": ConstantLR, + "warmup_cosine": WarmupCosineSchedule, + "warmup_constant": WarmupConstantSchedule, + "warmup_linear": WarmupLinearSchedule +} + + +class BertAdam(Optimizer): + """Implements BERT version of Adam algorithm with weight decay fix. + Params: + lr: learning rate + warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1 + t_total: total number of training steps for the learning + rate schedule, -1 means constant learning rate of 1. (no warmup regardless of warmup setting). Default: -1 + schedule: schedule to use for the warmup (see above). + Can be `'warmup_linear'`, `'warmup_constant'`, `'warmup_cosine'`, `'none'`, `None` or a `_LRSchedule` object (see below). + If `None` or `'none'`, learning rate is always kept constant. + Default : `'warmup_linear'` + b1: Adams b1. Default: 0.9 + b2: Adams b2. Default: 0.999 + e: Adams epsilon. Default: 1e-6 + weight_decay: Weight decay. Default: 0.01 + max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0 + """ + def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear', + b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01, max_grad_norm=1.0, **kwargs): + if lr is not required and lr < 0.0: + raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr)) + if not isinstance(schedule, _LRSchedule) and schedule not in SCHEDULES: + raise ValueError("Invalid schedule parameter: {}".format(schedule)) + if not 0.0 <= b1 < 1.0: + raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1)) + if not 0.0 <= b2 < 1.0: + raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2)) + if not e >= 0.0: + raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e)) + # initialize schedule object + if not isinstance(schedule, _LRSchedule): + schedule_type = SCHEDULES[schedule] + schedule = schedule_type(warmup=warmup, t_total=t_total) + else: + if warmup != -1 or t_total != -1: + logger.warning("warmup and t_total on the optimizer are ineffective when _LRSchedule object is provided as schedule. " + "Please specify custom warmup and t_total in _LRSchedule object.") + defaults = dict(lr=lr, schedule=schedule, + b1=b1, b2=b2, e=e, weight_decay=weight_decay, + max_grad_norm=max_grad_norm) + super(BertAdam, self).__init__(params, defaults) + + def get_lr(self): + lr = [] + for group in self.param_groups: + for p in group['params']: + state = self.state[p] + if len(state) == 0: + return [0] + lr_scheduled = group['lr'] + lr_scheduled *= group['schedule'].get_lr(state['step']) + lr.append(lr_scheduled) + return lr + + def step(self, closure=None): + """Performs a single optimization step. + + Arguments: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + """ + loss = None + if closure is not None: + loss = closure() + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + if grad.is_sparse: + raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead') + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + # Exponential moving average of gradient values + state['next_m'] = torch.zeros_like(p.data) + # Exponential moving average of squared gradient values + state['next_v'] = torch.zeros_like(p.data) + + next_m, next_v = state['next_m'], state['next_v'] + beta1, beta2 = group['b1'], group['b2'] + + # Add grad clipping + if group['max_grad_norm'] > 0: + clip_grad_norm_(p, group['max_grad_norm']) + + # Decay the first and second moment running average coefficient + # In-place operations to update the averages at the same time + next_m.mul_(beta1).add_(grad, alpha=1 - beta1) + next_v.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) + update = next_m / (next_v.sqrt() + group['e']) + + # Just adding the square of the weights to the loss function is *not* + # the correct way of using L2 regularization/weight decay with Adam, + # since that will interact with the m and v parameters in strange ways. + # + # Instead we want to decay the weights in a manner that doesn't interact + # with the m/v parameters. This is equivalent to adding the square + # of the weights to the loss with plain (non-momentum) SGD. + if group['weight_decay'] > 0.0: + update += group['weight_decay'] * p.data + + lr_scheduled = group['lr'] + lr_scheduled *= group['schedule'].get_lr(state['step']) + + update_with_lr = lr_scheduled * update + p.data.add_(-update_with_lr) + + state['step'] += 1 + # step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1 + # No bias correction + # bias_correction1 = 1 - beta1 ** state['step'] + # bias_correction2 = 1 - beta2 ** state['step'] + + return loss diff --git a/transformer/tokenization.py b/transformer/tokenization.py new file mode 100644 index 0000000..c61a4e9 --- /dev/null +++ b/transformer/tokenization.py @@ -0,0 +1,365 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import collections +import logging +import os +import unicodedata +from io import open + +logger = logging.getLogger(__name__) +VOCAB_NAME='vocab.txt' + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r", encoding="utf-8") as reader: + while True: + token = reader.readline() + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class BertTokenizer(object): + """Runs end-to-end tokenization: punctuation splitting + wordpiece""" + + def __init__(self, vocab_file, do_lower_case=True, max_len=None, do_basic_tokenize=True, basic_only=False, + never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")): + """Constructs a BertTokenizer. + + Args: + vocab_file: Path to a one-wordpiece-per-line vocabulary file + do_lower_case: Whether to lower case the input + Only has an effect when do_wordpiece_only=False + do_basic_tokenize: Whether to do basic tokenization before wordpiece. + max_len: An artificial maximum length to truncate tokenized sequences to; + Effective maximum length is always the minimum of this + value (if specified) and the underlying BERT model's + sequence length. + never_split: List of tokens which will never be split during tokenization. + Only has an effect when do_wordpiece_only=False + """ + if not os.path.isfile(vocab_file): + raise ValueError( + "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained " + "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)) + + self.vocab = load_vocab(vocab_file) + self.ids_to_tokens = collections.OrderedDict( + [(ids, tok) for tok, ids in self.vocab.items()]) + self.do_basic_tokenize = do_basic_tokenize + if do_basic_tokenize: + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case, + never_split=never_split) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + self.max_len = max_len if max_len is not None else int(1e12) + self.basic_only = basic_only + + def tokenize(self, text): + split_tokens = [] + if self.do_basic_tokenize: + for token in self.basic_tokenizer.tokenize(text): + if self.basic_only: + split_tokens.append(token) + else: + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + else: + split_tokens = self.wordpiece_tokenizer.tokenize(text) + return split_tokens + + def convert_tokens_to_ids(self, tokens): + """Converts a sequence of tokens into ids using the vocab.""" + ids = [] + for token in tokens: + ids.append(self.vocab.get(token, self.vocab['[UNK]'])) + if len(ids) > self.max_len: + logger.warning( + "Token indices sequence length is longer than the specified maximum " + " sequence length for this BERT model ({} > {}). Running this" + " sequence through BERT will result in indexing errors".format(len(ids), self.max_len) + ) + return ids + + def convert_ids_to_tokens(self, ids): + """Converts a sequence of ids in wordpiece tokens using the vocab.""" + tokens = [] + for i in ids: + tokens.append(self.ids_to_tokens[i]) + return tokens + + def save_vocabulary(self, vocab_path): + """Save the tokenizer vocabulary to a directory or file.""" + index = 0 + if os.path.isdir(vocab_path): + vocab_file = os.path.join(vocab_path, VOCAB_NAME) + with open(vocab_file, "w", encoding="utf-8") as writer: + for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]): + if index != token_index: + logger.warning("Saving vocabulary to {}: vocabulary indices are not consecutive." + " Please check that the vocabulary is not corrupted!".format(vocab_file)) + index = token_index + writer.write(token + u'\n') + index += 1 + return vocab_file + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): + """ + Instantiate a PreTrainedBertModel from a pre-trained model file. + Download and cache the pre-trained model file if needed. + """ + resolved_vocab_file = os.path.join(pretrained_model_name_or_path, 'vocab.txt') + + max_len = 512 + kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len) + # Instantiate tokenizer. + tokenizer = cls(resolved_vocab_file, *inputs, **kwargs) + + return tokenizer + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, + do_lower_case=True, + never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + self.never_split = never_split + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = self._clean_text(text) + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case and token not in self.never_split: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + if text in self.never_split: + return [text] + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenization.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer`. + + Returns: + A list of wordpiece tokens. + """ + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/transformer/utils_quant.py b/transformer/utils_quant.py new file mode 100644 index 0000000..00f297d --- /dev/null +++ b/transformer/utils_quant.py @@ -0,0 +1,221 @@ +import torch +import torch.nn as nn +import sys +import logging + +from transformers import SQUEEZEBERT_PRETRAINED_MODEL_ARCHIVE_LIST + +log_format = '%(asctime)s %(message)s' +logging.basicConfig(stream=sys.stdout, level=logging.INFO, + format=log_format, datefmt='%m/%d %I:%M:%S %p') +logger = logging.getLogger() + + +class SymQuantizer(torch.autograd.Function): + """ + uniform quantization + """ + @staticmethod + def forward(ctx, input, clip_val=2.5, num_bits=2, layerwise=False): + """ + :param ctx: + :param input: tensor to be quantized + :param clip_val: clip the tensor before quantization + :param quant_bits: number of bits + :return: quantized tensor + """ + + ctx.save_for_backward(input, clip_val) + # input = torch.clamp(input, clip_val[0], clip_val[1]) + input = torch.where(input < clip_val[1], input, clip_val[1]) + input = torch.where(input > clip_val[0], input, clip_val[0]) + # NOTE: dynamic scaling (max_input). + if layerwise: + max_input = torch.max(torch.abs(input)).expand_as(input) + else: + if input.ndimension() <= 3: + # weight & hidden layer + max_input = torch.max(torch.abs(input), dim=-1, keepdim=True)[0].expand_as(input).detach() + elif input.ndimension() == 4: + # TODO: attention score matrix, calculate alpha / beta per head + tmp = input.view(input.shape[0], input.shape[1], -1) + max_input = torch.max(torch.abs(tmp), dim=-1, keepdim=True)[0].unsqueeze(-1).expand_as(input).detach() + else: + raise ValueError + s = (2 ** (num_bits - 1) - 1) / max_input + output = torch.round(input * s).div(s) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + :param ctx: saved non-clipped full-precision tensor and clip_val + :param grad_output: gradient ert the quantized tensor + :return: estimated gradient wrt the full-precision tensor + """ + input, clip_val = ctx.saved_tensors # unclipped input + grad_input = grad_output.clone() + grad_input[input.ge(clip_val[1])] = 0 + grad_input[input.le(clip_val[0])] = 0 + return grad_input, None, None, None + + +class AsymQuantizer(torch.autograd.Function): + """ + min-max quantization + """ + @staticmethod + def forward(ctx, input, clip_val, num_bits, layerwise): + """ + :param ctx: + :param input: tensor to be quantized + :param clip_val: clip the tensor before quantization + :param quant_bits: number of bits + :return: quantized tensor + """ + ctx.save_for_backward(input, clip_val) + + input = torch.where(input < clip_val[1], input, clip_val[1]) + input = torch.where(input > clip_val[0], input, clip_val[0]) + # input = torch.clamp(input, clip_val[0], clip_val[1]) + # NOTE: dynamic scaling gives better performance than static + if layerwise: + alpha = (input.max() - input.min()).detach() + beta = input.min().detach() + else: + if input.ndimension() <= 3: + # weight & hidden layer + alpha = (input.max(dim=-1, keepdim=True)[0] - input.min(dim=-1, keepdim=True)[0]).expand_as(input).detach() + beta = input.min(dim=-1, keepdim=True)[0].expand_as(input).detach() + elif input.ndimension() == 4: + # TODO: attention score matrix, calculate alpha / beta per head + tmp = input.view(input.shape[0], input.shape[1], -1) + alpha = (tmp.max(dim=-1, keepdim=True)[0].unsqueeze(-1) - \ + tmp.min(dim=-1, keepdim=True)[0].unsqueeze(-1)).expand_as(input).detach() + beta = tmp.min(dim=-1, keepdim=True)[0].unsqueeze(-1).expand_as(input).detach() + else: + raise ValueError + input_normalized = (input - beta) / (alpha + 1e-8) + s = (2**num_bits - 1) + quant_input = torch.round(input_normalized * s).div(s) + output = quant_input * (alpha + 1e-8) + beta + + + return output + + @staticmethod + def backward(ctx, grad_output): + """ + :param ctx: saved non-clipped full-precision tensor and clip_val + :param grad_output: gradient ert the quantized tensor + :return: estimated gradient wrt the full-precision tensor + """ + input, clip_val = ctx.saved_tensors # unclipped input + grad_input = grad_output.clone() + grad_input[input.ge(clip_val[1])] = 0 + grad_input[input.le(clip_val[0])] = 0 + return grad_input, None, None, None + + +class TwnQuantizer(torch.autograd.Function): + """Ternary Weight Networks (TWN) + Ref: https://arxiv.org/abs/1605.04711 + """ + @staticmethod + def forward(ctx, input, clip_val, num_bits, layerwise): + """ + :param input: tensor to be ternarized + :return: quantized tensor + """ + mean_scale = 0.7 + + ctx.save_for_backward(input, clip_val) + + input = torch.where(input < clip_val[1], input, clip_val[1]) + input = torch.where(input > clip_val[0], input, clip_val[0]) + + if layerwise: + m = input.norm(p=1).div(input.nelement()) + thres = mean_scale * m + pos = (input > thres).float() + neg = (input < -thres).float() + mask = (input.abs() > thres).float() + alpha = (mask * input).abs().sum() / mask.sum() + result = alpha * pos - alpha * neg + else: # row-wise only for embed / weight + n = input[0].nelement() + m = input.data.norm(p=1, dim=1).div(n) + thres = (mean_scale * m).view(-1, 1).expand_as(input) + pos = (input > thres).float() + neg = (input < -thres).float() + mask = (input.abs() > thres).float() + alpha = ((mask * input).abs().sum(dim=1) / mask.sum(dim=1)).view(-1, 1) + result = alpha * pos - alpha * neg + + return result + + @staticmethod + def backward(ctx, grad_output): + """ + :param ctx: saved non-clipped full-precision tensor and clip_val + :param grad_output: gradient ert the quantized tensor + :return: estimated gradient wrt the full-precision tensor + """ + input, clip_val = ctx.saved_tensors # unclipped input + grad_input = grad_output.clone() + grad_input[input.ge(clip_val[1])] = 0 + grad_input[input.le(clip_val[0])] = 0 + return grad_input, None, None, None + + +class QuantizeLinear(nn.Linear): + + def __init__(self, *kargs,bias=True, config = None, map=False, name=None): + super(QuantizeLinear, self).__init__(*kargs,bias=True) + self.weight_bits = 2 + self.input_bits= 8 + self.mean_scale = config.mean_scale + + self.name = name + self.map = map + self.config = config + + self.weight_quantizer = TwnQuantizer + # Weight & Activation Quantization Setting + self.act_quantizer = SymQuantizer + self.register_buffer('act_clip_val', torch.tensor([-config.clip_val, config.clip_val])) + self.register_buffer('weight_clip_val', torch.tensor([-config.clip_val, config.clip_val]))\ + + def forward(self, input): + # quantize weight + weight = self.weight_quantizer.apply(self.weight, self.weight_clip_val, self.weight_bits, True) + q_input = self.act_quantizer.apply(input, self.act_clip_val, self.input_bits, True) + + # nn.Linear w/ Quantized input and output + out = nn.functional.linear(q_input, weight) + + if not self.bias is None: + out += self.bias.view(1, -1).expand_as(out) + + return out + +class QuantizeEmbedding(nn.Embedding): + + def __init__(self, *kargs,padding_idx=None, config = None): + super(QuantizeEmbedding, self).__init__(*kargs, padding_idx = padding_idx) + self.weight_bits = 2 + self.layerwise = False + self.mean_scale = config.mean_scale + self.config = config + + self.weight_quantizer = TwnQuantizer + self.register_buffer('weight_clip_val', torch.tensor([-config.clip_val, config.clip_val])) + + def forward(self, input): + + weight = self.weight_quantizer.apply(self.weight, self.weight_clip_val, self.weight_bits, self.layerwise) + + out = nn.functional.embedding( + input, weight, self.padding_idx, self.max_norm, + self.norm_type, self.scale_grad_by_freq, self.sparse) + return out diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..4ab7942 --- /dev/null +++ b/utils.py @@ -0,0 +1,97 @@ +#* +# @file Different utility functions +# Copyright (c) Zhewei Yao, Amir Gholami +# All rights reserved. +# This file is part of PyHessian library. +# +# PyHessian is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PyHessian is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PyHessian. If not, see . +#* + +import torch +import math +from torch.autograd import Variable +import numpy as np + + +def group_product(xs, ys): + """ + the inner product of two lists of variables xs,ys + :param xs: + :param ys: + :return: + """ + return sum([torch.sum(x * y) for (x, y) in zip(xs, ys)]) + + +def group_add(params, update, alpha=1): + """ + params = params + update*alpha + :param params: list of variable + :param update: list of data + :return: + """ + for i, p in enumerate(params): + params[i].data.add_(update[i] * alpha) + return params + + +def normalization(v): + """ + normalization of a list of vectors + return: normalized vectors v + """ + s = group_product(v, v) + s = s**0.5 + s = s.cpu().item() + v = [vi / (s + 1e-6) for vi in v] + return v + + +def get_params_grad(model): + """ + get model parameters and corresponding gradients + """ + params = [] + grads = [] + for param in model.parameters(): + if not param.requires_grad: + continue + params.append(param) + grads.append(0. if param.grad is None else param.grad + 0.) + return params, grads + + +def hessian_vector_product(gradsH, params, v): + """ + compute the hessian vector product of Hv, where + gradsH is the gradient at the current point, + params is the corresponding variables, + v is the vector. + """ + hv = torch.autograd.grad(gradsH, + params, + grad_outputs=v, + only_inputs=True, + retain_graph=True) + return hv + + +def orthnormal(w, v_list): + """ + make vector w orthogonal to each vector in v_list. + afterwards, normalize the output w + """ + for v in v_list: + w = group_add(w, v, alpha=-group_product(w, v)) + return normalization(w) diff --git a/utils_glue.py b/utils_glue.py new file mode 100644 index 0000000..a9cce10 --- /dev/null +++ b/utils_glue.py @@ -0,0 +1,617 @@ +import os +import logging +import sys +import csv + +from scipy.stats import pearsonr, spearmanr +from sklearn.metrics import matthews_corrcoef, f1_score + +logger = logging.getLogger() + +class InputExample(object): + """A single training/test example for simple sequence classification.""" + + def __init__(self, guid, text_a, text_b=None, label=None): + """Constructs a InputExample. + + Args: + guid: Unique id for the example. + text_a: string. The untokenized text of the first sequence. For single + sequence tasks, only this sequence must be specified. + text_b: (Optional) string. The untokenized text of the second sequence. + Only must be specified for sequence pair tasks. + label: (Optional) string. The label of the example. This should be + specified for train and dev examples, but not for test examples. + """ + self.guid = guid + self.text_a = text_a + self.text_b = text_b + self.label = label + + +class InputFeatures(object): + """A single set of features of data.""" + + def __init__(self, input_ids, input_mask, segment_ids, label_id, seq_length=None): + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.seq_length = seq_length + self.label_id = label_id + + +class DataProcessor(object): + """Base class for data converters for sequence classification data sets.""" + + def get_train_examples(self, data_dir): + """Gets a collection of `InputExample`s for the train set.""" + raise NotImplementedError() + + def get_dev_examples(self, data_dir): + """Gets a collection of `InputExample`s for the dev set.""" + raise NotImplementedError() + + def get_test_examples(self, data_dir): + """Gets a collection of `InputExample`s for the test set.""" + raise NotImplementedError() + + def get_labels(self): + """Gets the list of labels for this data set.""" + raise NotImplementedError() + + @classmethod + def _read_tsv(cls, input_file, quotechar=None): + """Reads a tab separated value file.""" + with open(input_file, "r", newline='',encoding="utf-8") as f: + + reader = csv.reader(f, delimiter="\t", quotechar=quotechar) + lines = [] + + try: + for line in reader: + if sys.version_info[0] == 2: + + line = list(unicode(cell, 'utf-8') for cell in line) + lines.append(line) + except: + sys.exit('file %s, line %d' % (input_file, reader.line_num)) + + return lines + + +class MrpcProcessor(DataProcessor): + """Processor for the MRPC data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_aug_examples(self, data_dir, aug_N): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, f"train_aug_{aug_N}.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, i) + text_a = line[3] + text_b = line[4] + if set_type == 'test': + label = None + else: + label = line[0] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class MnliProcessor(DataProcessor): + """Processor for the MultiNLI data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")), + "dev_matched") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test") + + def get_aug_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["contradiction", "entailment", "neutral"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, line[0]) + text_a = line[8] + text_b = line[9] + if set_type == 'test': + label = None + else: + label = line[-1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class MnliMismatchedProcessor(MnliProcessor): + """Processor for the MultiNLI Mismatched data set (GLUE version).""" + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev_mismatched.tsv")), + "dev_matched") + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test_mismatched.tsv")), + "test") + + +class ColaProcessor(DataProcessor): + """Processor for the CoLA data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_aug_examples(self, data_dir, aug_N): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, f"train_aug_{aug_N}.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + if set_type == 'test': + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, i) + text_a = line[1] + label = None + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) + else: + for (i, line) in enumerate(lines): + guid = "%s-%s" % (set_type, i) + text_a = line[3] + label = line[1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) + return examples + + +class Sst2Processor(DataProcessor): + """Processor for the SST-2 data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_aug_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, i) + if set_type == 'test': + text_a = line[1] + label = None + else: + text_a = line[0] + label = line[1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) + return examples + + +class StsbProcessor(DataProcessor): + """Processor for the STS-B data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_aug_examples(self, data_dir, aug_N): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, f"train_aug_{aug_N}.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return [None] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, line[0]) + text_a = line[7] + text_b = line[8] + if set_type== 'test': + label = None + else: + label = line[-1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class QqpProcessor(DataProcessor): + """Processor for the STS-B data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_aug_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, line[0]) + try: + if set_type=='test': + text_a = line[1] + text_b = line[2] + label = None + else: + text_a = line[3] + text_b = line[4] + label = line[5] + except IndexError: + continue + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class QnliProcessor(DataProcessor): + """Processor for the STS-B data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), + "dev_matched") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_aug_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train_aug.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["entailment", "not_entailment"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, line[0]) + if set_type=='test': + text_a = line[1] + text_b = line[2] + label = None + else: + text_a = line[1] + text_b = line[2] + label = line[-1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class RteProcessor(DataProcessor): + """Processor for the RTE data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_test_examples(self, data_dir): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "test.tsv")), "test") + + def get_aug_examples(self, data_dir, aug_N): + return self._create_examples( + self._read_tsv(os.path.join(data_dir, f"train_aug_{aug_N}.tsv")), "aug") + + def get_labels(self): + """See base class.""" + return ["entailment", "not_entailment"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, line[0]) + if set_type=='test': + text_a = line[1] + text_b = line[2] + label = None + else: + text_a = line[1] + text_b = line[2] + label = line[-1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class WnliProcessor(DataProcessor): + """Processor for the WNLI data set (GLUE version).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") + + def get_labels(self): + """See base class.""" + return ["0", "1"] + + def _create_examples(self, lines, set_type): + """Creates examples for the training and dev sets.""" + examples = [] + for (i, line) in enumerate(lines): + if i == 0: + continue + guid = "%s-%s" % (set_type, line[0]) + text_a = line[1] + text_b = line[2] + label = line[-1] + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +def convert_examples_to_features(examples, label_list, max_seq_length, + tokenizer, output_mode): + """Loads a data file into a list of `InputBatch`s.""" + + label_map = {label: i for i, label in enumerate(label_list)} + + features = [] + for (ex_index, example) in enumerate(examples): + if ex_index % 10000 == 0: + logger.info("Writing example %d of %d" % (ex_index, len(examples))) + tokens_a = tokenizer.tokenize(example.text_a) + + tokens_b = None + if example.text_b: + tokens_b = tokenizer.tokenize(example.text_b) + _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) + else: + if len(tokens_a) > max_seq_length - 2: + tokens_a = tokens_a[:(max_seq_length - 2)] + + tokens = ["[CLS]"] + tokens_a + ["[SEP]"] + segment_ids = [0] * len(tokens) + + if tokens_b: + tokens += tokens_b + ["[SEP]"] + segment_ids += [1] * (len(tokens_b) + 1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + input_mask = [1] * len(input_ids) + seq_length = len(input_ids) + + padding = [0] * (max_seq_length - len(input_ids)) + input_ids += padding + input_mask += padding + segment_ids += padding + + assert len(input_ids) == max_seq_length + assert len(input_mask) == max_seq_length + assert len(segment_ids) == max_seq_length + try: + if output_mode == "classification": + label_id = label_map[example.label] + elif output_mode == "regression": + label_id = float(example.label) + else: + raise KeyError(output_mode) + except: + label_id = 0 + + if ex_index < 1: + logger.info("*** Example ***") + logger.info("guid: %s" % (example.guid)) + logger.info("tokens: %s" % " ".join( + [str(x) for x in tokens])) + logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) + logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask])) + logger.info( + "segment_ids: %s" % " ".join([str(x) for x in segment_ids])) + logger.info("label: {}".format(example.label)) + logger.info("label_id: {}".format(label_id)) + + features.append( + InputFeatures(input_ids=input_ids, + input_mask=input_mask, + segment_ids=segment_ids, + label_id=label_id, + seq_length=seq_length)) + return features + + +def _truncate_seq_pair(tokens_a, tokens_b, max_length): + """Truncates a sequence pair in place to the maximum length.""" + while True: + total_length = len(tokens_a) + len(tokens_b) + if total_length <= max_length: + break + if len(tokens_a) > len(tokens_b): + tokens_a.pop() + else: + tokens_b.pop() + + +def simple_accuracy(preds, labels): + return (preds == labels).mean() + + +def acc_and_f1(preds, labels): + acc = simple_accuracy(preds, labels) + f1 = f1_score(y_true=labels, y_pred=preds) + return { + "acc": acc, + "f1": f1, + "acc_and_f1": (acc + f1) / 2, + } + + +def pearson_and_spearman(preds, labels): + pearson_corr = pearsonr(preds, labels)[0] + spearman_corr = spearmanr(preds, labels)[0] + return { + "pearson": pearson_corr, + "spearmanr": spearman_corr, + "corr": (pearson_corr + spearman_corr) / 2, + } + + +def compute_metrics(task_name, preds, labels): + assert len(preds) == len(labels) + if task_name == "cola": + return {"mcc": matthews_corrcoef(labels, preds)} + elif task_name == "sst-2": + return {"acc": simple_accuracy(preds, labels)} + elif task_name == "mrpc": + return acc_and_f1(preds, labels) + elif task_name == "sts-b": + return pearson_and_spearman(preds, labels) + elif task_name == "qqp": + return acc_and_f1(preds, labels) + elif task_name == "mnli": + return {"acc": simple_accuracy(preds, labels)} + elif task_name == "mnli-mm": + return {"acc": simple_accuracy(preds, labels)} + elif task_name == "qnli": + return {"acc": simple_accuracy(preds, labels)} + elif task_name == "rte": + return {"acc": simple_accuracy(preds, labels)} + elif task_name == "wnli": + return {"acc": simple_accuracy(preds, labels)} + else: + raise KeyError(task_name)