Skip to content

Commit

Permalink
Merge from CTuning (#1038)
Browse files Browse the repository at this point in the history
  • Loading branch information
gfursin authored Dec 13, 2023
2 parents fc08ed0 + c7a18ff commit ce981c3
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 44 deletions.
8 changes: 5 additions & 3 deletions cm-mlops/script/app-mlperf-inference/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def postprocess(i):

if env.get('CM_MLPERF_USER_CONF', '') == '':
return {'return': 0}

output_dir = env['CM_MLPERF_OUTPUT_DIR']
mode = env['CM_MLPERF_LOADGEN_MODE']

Expand Down Expand Up @@ -61,7 +62,8 @@ def postprocess(i):
model = env['CM_MODEL']
model_full_name = env.get('CM_ML_MODEL_FULL_NAME', model)

if model == "resnet50":
if mode == "accuracy":
if model == "resnet50":
accuracy_filename = "accuracy-imagenet.py"
accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", \
accuracy_filename)
Expand All @@ -70,15 +72,15 @@ def postprocess(i):
accuracy_log_file_option_name = " --mlperf-accuracy-file "
datatype_option = " --dtype "+env['CM_IMAGENET_ACCURACY_DTYPE']

elif model == "retinanet":
elif model == "retinanet":
accuracy_filename = "accuracy-openimages.py"
accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", \
accuracy_filename)
dataset_args = " --openimages-dir " + env['CM_DATASET_PATH']
accuracy_log_file_option_name = " --mlperf-accuracy-file "
datatype_option = ""

elif 'bert' in model:
elif 'bert' in model:
accuracy_filename = "accuracy-squad.py"
accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_BERT_PATH'], accuracy_filename)
dataset_args = " --val_data '" + env['CM_DATASET_SQUAD_VAL_PATH'] + "' --vocab_file '" + env['CM_DATASET_SQUAD_VOCAB_PATH'] + "' --out_file predictions.json "
Expand Down
21 changes: 9 additions & 12 deletions cm-mlops/script/calibrate-model-for.qaic/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,17 @@
"tags": "get,dataset,imagenet,preprocessed,_calibration,_for.resnet50"
},
{
"enable_if_env":
{
"CM_CALIBRATE_SQUAD": [
"on"
]
},
"names": [
"squad-cal",
"preprocessed-dataset"
],
"tags": "get,dataset,calibration,squad,_pickle,_width.384"
"tags": "get,dataset,preprocessed,_calib1,squad,_pickle,_seq-length.384,_packed"
},
{
"names": [
Expand Down Expand Up @@ -172,22 +178,13 @@
},
"adr": {
"model-src": {
"tags": "retinanet,_no-nms"
"tags": "bert-large,_onnx"
}
},
"deps": [
{
"names": [
"squad-preprocessed",
"preprocessed-dataset"
],
"tags": "get,preprocessed,dataset,squad,_packed,_pickle"
}
],
"env": {
"CM_CALIBRATE_SQUAD": "yes",
"CM_QAIC_COMPILER_ARGS": "",
"CM_QAIC_COMPILER_PARAMS": "-onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,<<<CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH>>> -input-list-file=<<<CM_DATASET_SQUAD_TOKENIZED_PACKED_FILENAMES_FILE>>> -num-histogram-bins=512 -profiling-threads=4",
"CM_QAIC_COMPILER_PARAMS": "-onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,<<<CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH>>> -input-list-file=<<<CM_DATASET_SQUAD_TOKENIZED_PACKED_FILENAMES_FILE>>> -num-histogram-bins=512 -profiling-threads=96",
"CM_QAIC_MODEL_TO_CONVERT": "calibrate_bert_mlperf"
},
"seq.#": {
Expand Down
2 changes: 1 addition & 1 deletion cm-mlops/script/calibrate-model-for.qaic/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def create_batched_inputs(env):

def construct_calibration_cmd(env):
compiler_params = env['CM_QAIC_COMPILER_PARAMS']
batchsize = env['CM_QAIC_MODEL_BATCH_SIZE']
batchsize = env.get('CM_QAIC_MODEL_BATCH_SIZE', "1")
cmd = env['CM_QAIC_EXEC_PATH'] + " "
if env.get('CM_CREATE_INPUT_BATCH', '') == 'yes':
cmd += " -input-list-file=batched_input_files -batchsize="+batchsize + " "
Expand Down
44 changes: 38 additions & 6 deletions cm-mlops/script/compile-model-for.qaic/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
"variations": {
"bs.1": {
"group": "batch-size",
"default": true,
"env": {
"CM_QAIC_MODEL_BATCH_SIZE": "1"
},
Expand Down Expand Up @@ -148,7 +147,7 @@
},
"resnet50,server,nsp.14": {
"env": {
"CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=48 -ols=4"
"CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=4 -ols=4"
},
"default_variations": {
"batch-size": "bs.8"
Expand Down Expand Up @@ -186,6 +185,30 @@
"CM_QAIC_MODEL_COMPILER_ARGS": "-sdp-cluster-sizes=4,4 -mos=1,4"
}
},
"bert-99,offline": {
"env": {
}
},
"bert-99,offline,nsp.14": {
"env": {
"CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=1 -mos=1 -ols=3"
}
},
"bert-99,server,nsp.14": {
"env": {
"CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=1 -mos=1 -ols=3"
}
},
"bert-99,multistream,nsp.14": {
"env": {
"CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=4"
}
},
"bert-99,singlestream,nsp.14": {
"env": {
"CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=8"
}
},
"tf": {
"group": "model-framework"
},
Expand Down Expand Up @@ -217,15 +240,24 @@
"bert-99": {
"adr": {
"model-src": {
"tags": "bert-99,_onnx"
"tags": "bert-large,_onnx"
}
},
"env": {
"CM_COMPILE_BERT": "on",
"CM_QAIC_MODEL_TO_CONVERT": "calibrate_bert_mlperf",
"CM_QAIC_MODEL_COMPILER_ARGS": "-aic-hw -aic-hw-version=2.0 -execute-nodes-in-fp16=Mul,Sqrt,Div,Add,ReduceMean,Softmax,Sub,Gather,Erf,Pow,Concat,Tile,LayerNormalization -quantization-schema=symmetric_with_uint8 -quantization-precision=Int8 -quantization-precision-bias=Int32 -vvv -compile-only -onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,[SEG] -multicast-weights",
"CM_QAIC_MODEL_COMPILER_PARAMS": "-enable-channelwise -profiling-threads=32 -onnx-define-symbol=batch_size,[BATCH_SIZE] -node-precision-info=[NODE_PRECISION_FILE]"
}
"CM_QAIC_MODEL_COMPILER_ARGS": "-aic-hw -aic-hw-version=2.0 -execute-nodes-in-fp16=Mul,Sqrt,Div,Add,ReduceMean,Softmax,Sub,Gather,Erf,Pow,Concat,Tile,LayerNormalization -quantization-schema=symmetric_with_uint8 -quantization-precision=Int8 -quantization-precision-bias=Int32 -vvv -compile-only -onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,384 -multicast-weights",
"CM_QAIC_MODEL_COMPILER_PARAMS_BASE": ""
},
"deps": [
{
"tags": "calibrate,qaic,_bert-99",
"names": [
"bert-profile",
"qaic-profile"
]
}
]
}
}
}
5 changes: 3 additions & 2 deletions cm-mlops/script/compile-model-for.qaic/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ def preprocess(i):
def construct_compilation_cmd(env):
compiler_params_base = env['CM_QAIC_MODEL_COMPILER_PARAMS_BASE']
compiler_args = env['CM_QAIC_MODEL_COMPILER_ARGS'] + ' ' + env.get('CM_QAIC_MODEL_COMPILER_ARGS_SUT', '')
batchsize = env['CM_QAIC_MODEL_BATCH_SIZE']
batchsize = env.get('CM_QAIC_MODEL_BATCH_SIZE')

if env.get('CM_QAIC_MODEL_QUANTIZATION', '') == 'yes':
profile_string = " -load-profile=" + env['CM_QAIC_MODEL_PROFILE_WITH_PATH']
else:
profile_string = ''

compiler_params = compiler_params_base + ' ' + compiler_args
compiler_params += " -batchsize="+batchsize
if batchsize:
compiler_params += " -batchsize="+batchsize

aic_binary_dir = os.path.join(os.getcwd(), "elfs")

Expand Down
12 changes: 7 additions & 5 deletions cm-mlops/script/get-preprocessed-dataset-squad/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ variations:
calib1:
group: calibration-set
env:
CM_SQUAD_CALIBRATION_SET: one
CM_DATASET_SQUAD_CALIBRATION_SET: one
calib2:
group: calibration-set
env:
CM_SQUAD_CALIBRATION_SET: two
CM_DATASET_SQUAD_CALIBRATION_SET: two
no-calib:
group: calibration-set
default: true
env:
CM_SQUAD_CALIBRATION_SET: ''
CM_DATASET_SQUAD_CALIBRATION_SET: ''
raw:
group: raw
default: true
Expand Down Expand Up @@ -85,8 +85,10 @@ variations:
CM_DATASET_SQUAD_PACKED: 'yes'
deps:
- tags: get,preprocessed,squad,_pickle
inherit_varation_tags: true
skipa_inherit_variation_groups:
env:
CM_DATASET_SQUAD_PACKED: ''
inherit_variation_tags: true
skip_inherit_variation_groups:
- packing

versions: {}
8 changes: 4 additions & 4 deletions cm-mlops/script/get-preprocessed-dataset-squad/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ def preprocess(i):

quiet = (env.get('CM_QUIET', False) == 'yes')

if env.get('CM_SQUAD_CALIBRATION_SET', '') == "one":
if env.get('CM_DATASET_SQUAD_CALIBRATION_SET', '') == "one":
env['DATASET_CALIBRATION_FILE'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'calibration', 'SQuAD-v1.1', 'bert_calibration_features.txt')
env['DATASET_CALIBRATION_ID'] = 1
elif env.get('CM_SQUAD_CALIBRATION_SET', '') == "two":
elif env.get('CM_DATASET_SQUAD_CALIBRATION_SET', '') == "two":
env['DATASET_CALIBRATION_FILE'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'calibration', 'SQuAD-v1.1', 'bert_calibration_qas_ids.txt')
env['DATASET_CALIBRATION_ID'] = 2
else:
Expand Down Expand Up @@ -55,8 +55,8 @@ def postprocess(i):
else:
with open("packed_filenames.txt", "w") as f:
for dirname in os.listdir(cur):
if os.path.isdir(dirname):
f.write(os.path.join(cur, "input_ids.raw") + ", " + os.path.join(cur, "segment_ids.raw") + ", " + os.path.join(cur, "input_position_ids.raw")+ "\n")
if os.path.isdir(dirname) and not dirname.startswith("_"):
f.write(os.path.join(cur, dirname, "input_ids.raw") + "," + os.path.join(cur, dirname, "segment_ids.raw") + "," + os.path.join(cur, dirname, "input_position_ids.raw")+ "\n")
env['CM_DATASET_SQUAD_TOKENIZED_PACKED_FILENAMES_FILE'] = os.path.join(cur, "packed_filenames.txt")

return {'return':0}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# MLPerf Inference Benchmarking on AWS dl2q.24xlarge instance using 8 QAIC Cloud AI 100

`dl2q.24xlarge` instance is available in `us-west-2d` and it has 96 vCPUs and 768 GB of memory.

[Deep Learning Base Qualcomm AMI (Amazon Linux 2)](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#Images:visibility=public-images;imageId=ami-0287712deef96ecc6) image is recommended OS image as it comes with the QIAC SDKs (both Apps and Platform) preinstalled.


## System setup
```
yum install -y python3-devel git
python3 -m pip install cmind
cm pull repo mlcommons@ck
```

## ResNet50

Do a performance run for the Offline scenario

```
cm run script --tags=generate-run-cmds,inference,_performance-only --device=qaic --backend=glow \
--scenario=Offline --implementation=kilt --model=resnet50 \
--test_query_count=40000 --precision=fp32 --rerun
```

* `--adr.lperf-inference-implementation.device_ids=0` can be used to run the inference only on the first QAIC device
* `--precision=uint8` is the best option to be used but unfortunately, it is not working with the default platform SDK. When we use `--precision=fp32` the float32 inputs are on the fly converted by the QAIC driver to uint8 format. This overhead and 4x memory BW usage reduces the Offline scenario performance by nearly 50%. We got `~9000` QPS for a single device run

*WIP*
36 changes: 29 additions & 7 deletions cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ new_env_keys:
- CM_ML_MODEL_*
- CM_MAX_EXAMPLES
- CM_IMAGENET_ACCURACY_DTYPE
- CM_SQUAD_ACCURACY_DTYPE


# Dependencies on other CM scripts
Expand Down Expand Up @@ -152,7 +153,7 @@ deps:
- bert-99.9
names:
- squad-tokenized
tags: get,dataset,tokenized,squad
tags: get,dataset,tokenized,squad,_raw

########################################################################
# Install OpenImages
Expand Down Expand Up @@ -343,14 +344,19 @@ variations:
CM_BENCHMARK: STANDALONE_BERT
kilt_model_name: bert
kilt_model_seq_length: 384
kilt_model_batch_size: 384
kilt_model_batch_size: 1
kilt_model_bert_variant: BERT_PACKED
kilt_input_format: "INT64,1,384:INT64,1,8:INT64,1,384:INT64,1,384"
kilt_output_format: "FLOAT32,1,384:FLOAT32,1,384"
dataset_squad_tokenized_max_seq_length: 384
loadgen_buffer_size: 10833
loadgen_dataset_size: 10833

bert_,qaic:
env:
kilt_input_format: "UINT32,1,384:UINT32,1,8:UINT32,1,384:UINT32,1,384"
kilt_device_qaic_skip_stage: convert

standalone:
group: run-mode
default: true
Expand Down Expand Up @@ -384,6 +390,7 @@ variations:
- bert_
env:
CM_MODEL: bert-99
CM_SQUAD_ACCURACY_DTYPE: float32
CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/3750364/files/bert_large_v1_1_fake_quant.onnx"

bert-99.9:
Expand Down Expand Up @@ -422,19 +429,31 @@ variations:
env:
CM_MLPERF_LOADGEN_SCENARIO: SingleStream
kilt_model_batch_size: 1
adr:
qaic-model-compiler:
tags: _singlestream

multistream:
group: loadgen-scenario
env:
CM_MLPERF_LOADGEN_SCENARIO: MultiStream
adr:
qaic-model-compiler:
tags: _multistream
offline:
group: loadgen-scenario
env:
CM_MLPERF_LOADGEN_SCENARIO: Offline
adr:
qaic-model-compiler:
tags: _offline
server:
group: loadgen-scenario
env:
CM_MLPERF_LOADGEN_SCENARIO: Server
adr:
qaic-model-compiler:
tags: _server

uint8:
group: precision
Expand All @@ -461,15 +480,18 @@ variations:
base:
- nsp.14
env:
kilt_device_ids: 0,1,2,3,4,5,6,7
kilt_device_ids: "0"
qaic_queue_length: 6

dl2q.24xlarge,singlestream:
env:
kilt_device_ids: 0
base:
- activation-count.1
qaic_activation_count: "1"

dl2q.24xlarge,resnet50,offline:
base:
- activation-count.3
env:
qaic_activation_count: "3"

dl2q.24xlarge,bert-99,offline:
env:
qaic_activation_count: "14"
8 changes: 4 additions & 4 deletions cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def preprocess(i):
elif "bert" in env.get('CM_MODEL'):
env['dataset_squad_tokenized_max_seq_length'] = env['CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH']
env['dataset_squad_tokenized_root'] = env['CM_DATASET_SQUAD_TOKENIZED_ROOT']
env['dataset_squad_tokenized_input_ids'] = env['CM_DATASET_SQUAD_TOKENIZED_INPUT_IDS']
env['dataset_squad_tokenized_input_mask'] = env['CM_DATASET_SQUAD_TOKENIZED_INPUT_MASK']
env['dataset_squad_tokenized_segment_ids'] = env['CM_DATASET_SQUAD_TOKENIZED_SEGMENT_IDS']
env['dataset_squad_tokenized_input_ids'] = os.path.basename(env['CM_DATASET_SQUAD_TOKENIZED_INPUT_IDS'])
env['dataset_squad_tokenized_input_mask'] = os.path.basename(env['CM_DATASET_SQUAD_TOKENIZED_INPUT_MASK'])
env['dataset_squad_tokenized_segment_ids'] = os.path.basename(env['CM_DATASET_SQUAD_TOKENIZED_SEGMENT_IDS'])

if env.get('CM_BENCHMARK', '') == 'NETWORK_BERT_SERVER':
source_files.append(os.path.join(kilt_root, "benchmarks", "network", "bert", "server", "pack.cpp"))
Expand Down Expand Up @@ -96,7 +96,7 @@ def preprocess(i):
env['+ CXXFLAGS'].append("-DKILT_DEVICE_" + env['device'].upper())

# add preprocessor flag like "#define CM_MODEL_RESNET50"
env['+ CXXFLAGS'].append('-DCM_MODEL_' + env['CM_MODEL'].upper())
#env['+ CXXFLAGS'].append('-DCM_MODEL_' + env['CM_MODEL'].upper())
# add preprocessor flag like "#define CM_MLPERF_BACKEND_ONNXRUNTIME"
env['+ CXXFLAGS'].append('-DCM_MLPERF_BACKEND_' + env['CM_MLPERF_BACKEND'].upper())
# add preprocessor flag like "#define CM_MLPERF_DEVICE_CPU"
Expand Down

0 comments on commit ce981c3

Please sign in to comment.