-
Notifications
You must be signed in to change notification settings - Fork 127
/
quantize_onnx_model.py
executable file
·183 lines (160 loc) · 7.09 KB
/
quantize_onnx_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import os
import subprocess
from typing import Iterable, Tuple, List, Tuple
import torch
from datasets.imagenet_util import (
evaluate_ppq_module_with_imagenet,
load_imagenet_from_directory,
)
from ppq import QuantizationSettingFactory, QuantizationSetting
from ppq.api import espdl_quantize_onnx, get_target_platform
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data.dataset import Subset
import urllib.request
import zipfile
def quant_setting_mobilenet_v2(
onnx_path: str,
optim_quant_method: List[str] = None,
) -> Tuple[QuantizationSetting, str]:
"""Quantize onnx model with optim_quant_method.
Args:
optim_quant_method (List[str]): support 'MixedPrecision_quantization', 'LayerwiseEqualization_quantization'
-'MixedPrecision_quantization': if some layers in model have larger errors in 8-bit quantization, dispathching
the layers to 16-bit quantization. You can remove or add layers according to your
needs.
-'LayerwiseEqualization_quantization': using weight equalization strategy, which is proposed by Markus Nagel.
Refer to paper https://openaccess.thecvf.com/content_ICCV_2019/papers/Nagel_Data-Free_Quantization_Through_Weight_Equalization_and_Bias_Correction_ICCV_2019_paper.pdf for more information.
Since ReLU6 exists in MobilenetV2, convert ReLU6 to ReLU for better precision.
Returns:
[tuple]: [QuantizationSetting, str]
"""
quant_setting = QuantizationSettingFactory.espdl_setting()
if optim_quant_method is not None:
if "MixedPrecision_quantization" in optim_quant_method:
# These layers have larger errors in 8-bit quantization, dispatching to 16-bit quantization.
# You can remove or add layers according to your needs.
quant_setting.dispatching_table.append(
"/features/features.1/conv/conv.0/conv.0.0/Conv",
get_target_platform(TARGET, 16),
)
quant_setting.dispatching_table.append(
"/features/features.1/conv/conv.0/conv.0.2/Clip",
get_target_platform(TARGET, 16),
)
elif "LayerwiseEqualization_quantization" in optim_quant_method:
# layerwise equalization
quant_setting.equalization = True
quant_setting.equalization_setting.iterations = 4
quant_setting.equalization_setting.value_threshold = 0.4
quant_setting.equalization_setting.opt_level = 2
quant_setting.equalization_setting.interested_layers = None
# replace ReLU6 with ReLU
onnx_path = onnx_path.replace("mobilenet_v2.onnx", "mobilenet_v2_relu.onnx")
else:
raise ValueError(
"Please set optim_quant_method correctly. Support 'MixedPrecision_quantization', 'LayerwiseEqualization_quantization'"
)
return quant_setting, onnx_path
def collate_fn1(x: Tuple) -> torch.Tensor:
return torch.cat([sample[0].unsqueeze(0) for sample in x], dim=0)
def collate_fn2(batch: torch.Tensor) -> torch.Tensor:
return batch.to(DEVICE)
def report_hook(blocknum, blocksize, total):
downloaded = blocknum * blocksize
percent = downloaded / total * 100
print(f"\rDownloading calibration dataset: {percent:.2f}%", end="")
if __name__ == "__main__":
BATCH_SIZE = 32
INPUT_SHAPE = [3, 224, 224]
DEVICE = "cpu" # 'cuda' or 'cpu', if you use cuda, please make sure that cuda is available
TARGET = "esp32p4"
NUM_OF_BITS = 8
ONNX_PATH = "./models/torch/mobilenet_v2.onnx" #'models/onnx/mobilenet_v2.onnx'
ESPDL_MODLE_PATH = "models/onnx/mobilenet_v2.espdl"
CALIB_DIR = "./imagenet"
# Download mobilenet_v2 model from onnx models and dataset
imagenet_url = "https://dl.espressif.com/public/imagenet_calib.zip"
os.makedirs(CALIB_DIR, exist_ok=True)
if not os.path.exists("imagenet_calib.zip"):
urllib.request.urlretrieve(
imagenet_url, "imagenet_calib.zip", reporthook=report_hook
)
if not os.path.exists(os.path.join(CALIB_DIR, "calib")):
with zipfile.ZipFile("imagenet_calib.zip", "r") as zip_file:
zip_file.extractall(CALIB_DIR)
CALIB_DIR = os.path.join(CALIB_DIR, "calib")
# -------------------------------------------
# Prepare Calibration Dataset
# --------------------------------------------
if os.path.exists(CALIB_DIR):
print(f"load imagenet calibration dataset from directory: {CALIB_DIR}")
dataset = datasets.ImageFolder(
CALIB_DIR,
transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
),
)
dataset = Subset(dataset, indices=[_ for _ in range(0, 1024)])
dataloader = DataLoader(
dataset=dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=4,
pin_memory=False,
collate_fn=collate_fn1,
)
else:
# Random calibration dataset only for debug
print("load random calibration dataset")
def load_random_calibration_dataset() -> Iterable:
return [torch.rand(size=INPUT_SHAPE) for _ in range(BATCH_SIZE)]
# Load training data for creating a calibration dataloader.
dataloader = DataLoader(
dataset=load_random_calibration_dataset(),
batch_size=BATCH_SIZE,
shuffle=False,
)
# -------------------------------------------
# Quantize ONNX Model.
# --------------------------------------------
# create a setting for quantizing your network with ESPDL.
# if you don't need to optimize quantization, set the input 1 of the quant_setting_mobilenet_v2 function None
# Example: Using LayerwiseEqualization_quantization
quant_setting, ONNX_PATH = quant_setting_mobilenet_v2(
ONNX_PATH, ["LayerwiseEqualization_quantization"]
)
quant_ppq_graph = espdl_quantize_onnx(
onnx_import_file=ONNX_PATH,
espdl_export_file=ESPDL_MODLE_PATH,
calib_dataloader=dataloader,
calib_steps=32,
input_shape=[1] + INPUT_SHAPE,
target=TARGET,
num_of_bits=NUM_OF_BITS,
collate_fn=collate_fn2,
setting=quant_setting,
device=DEVICE,
error_report=True,
skip_export=False,
export_test_values=False,
verbose=1,
)
# -------------------------------------------
# Evaluate Quantized Model.
# --------------------------------------------
evaluate_ppq_module_with_imagenet(
model=quant_ppq_graph,
imagenet_validation_dir=CALIB_DIR,
batchsize=BATCH_SIZE,
device=DEVICE,
verbose=1,
)