From 42a9919e19c00466170ff5d8133e6a03092af3c3 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sat, 9 Dec 2023 13:33:35 +0000 Subject: [PATCH 1/5] Fixes for bert-qaic profile generation --- cm-mlops/script/calibrate-model-for.qaic/_cm.json | 15 +++------------ .../script/calibrate-model-for.qaic/customize.py | 2 +- .../get-preprocessed-dataset-squad/_cm.yaml | 12 +++++++----- .../get-preprocessed-dataset-squad/customize.py | 8 ++++---- 4 files changed, 15 insertions(+), 22 deletions(-) diff --git a/cm-mlops/script/calibrate-model-for.qaic/_cm.json b/cm-mlops/script/calibrate-model-for.qaic/_cm.json index 145bc4444e..9406504c98 100644 --- a/cm-mlops/script/calibrate-model-for.qaic/_cm.json +++ b/cm-mlops/script/calibrate-model-for.qaic/_cm.json @@ -59,7 +59,7 @@ "squad-cal", "preprocessed-dataset" ], - "tags": "get,dataset,calibration,squad,_pickle,_width.384" + "tags": "get,dataset,preprocessed,_calib1,squad,_pickle,_seq-length.384,_packed" }, { "names": [ @@ -172,22 +172,13 @@ }, "adr": { "model-src": { - "tags": "retinanet,_no-nms" + "tags": "bert-large,_onnx" } }, - "deps": [ - { - "names": [ - "squad-preprocessed", - "preprocessed-dataset" - ], - "tags": "get,preprocessed,dataset,squad,_packed,_pickle" - } - ], "env": { "CM_CALIBRATE_SQUAD": "yes", "CM_QAIC_COMPILER_ARGS": "", - "CM_QAIC_COMPILER_PARAMS": "-onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,<<>> -input-list-file=<<>> -num-histogram-bins=512 -profiling-threads=4", + "CM_QAIC_COMPILER_PARAMS": "-onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,<<>> -input-list-file=<<>> -num-histogram-bins=512 -profiling-threads=96", "CM_QAIC_MODEL_TO_CONVERT": "calibrate_bert_mlperf" }, "seq.#": { diff --git a/cm-mlops/script/calibrate-model-for.qaic/customize.py b/cm-mlops/script/calibrate-model-for.qaic/customize.py index a2445770b6..cb18a5b9e8 100644 --- a/cm-mlops/script/calibrate-model-for.qaic/customize.py +++ b/cm-mlops/script/calibrate-model-for.qaic/customize.py @@ -67,7 +67,7 @@ def create_batched_inputs(env): def construct_calibration_cmd(env): compiler_params = env['CM_QAIC_COMPILER_PARAMS'] - batchsize = env['CM_QAIC_MODEL_BATCH_SIZE'] + batchsize = env.get('CM_QAIC_MODEL_BATCH_SIZE', "1") cmd = env['CM_QAIC_EXEC_PATH'] + " " if env.get('CM_CREATE_INPUT_BATCH', '') == 'yes': cmd += " -input-list-file=batched_input_files -batchsize="+batchsize + " " diff --git a/cm-mlops/script/get-preprocessed-dataset-squad/_cm.yaml b/cm-mlops/script/get-preprocessed-dataset-squad/_cm.yaml index 0fa40e784f..191e885b73 100644 --- a/cm-mlops/script/get-preprocessed-dataset-squad/_cm.yaml +++ b/cm-mlops/script/get-preprocessed-dataset-squad/_cm.yaml @@ -42,16 +42,16 @@ variations: calib1: group: calibration-set env: - CM_SQUAD_CALIBRATION_SET: one + CM_DATASET_SQUAD_CALIBRATION_SET: one calib2: group: calibration-set env: - CM_SQUAD_CALIBRATION_SET: two + CM_DATASET_SQUAD_CALIBRATION_SET: two no-calib: group: calibration-set default: true env: - CM_SQUAD_CALIBRATION_SET: '' + CM_DATASET_SQUAD_CALIBRATION_SET: '' raw: group: raw default: true @@ -85,8 +85,10 @@ variations: CM_DATASET_SQUAD_PACKED: 'yes' deps: - tags: get,preprocessed,squad,_pickle - inherit_varation_tags: true - skipa_inherit_variation_groups: + env: + CM_DATASET_SQUAD_PACKED: '' + inherit_variation_tags: true + skip_inherit_variation_groups: - packing versions: {} diff --git a/cm-mlops/script/get-preprocessed-dataset-squad/customize.py b/cm-mlops/script/get-preprocessed-dataset-squad/customize.py index 8a6336dc9f..23044480a1 100644 --- a/cm-mlops/script/get-preprocessed-dataset-squad/customize.py +++ b/cm-mlops/script/get-preprocessed-dataset-squad/customize.py @@ -13,10 +13,10 @@ def preprocess(i): quiet = (env.get('CM_QUIET', False) == 'yes') - if env.get('CM_SQUAD_CALIBRATION_SET', '') == "one": + if env.get('CM_DATASET_SQUAD_CALIBRATION_SET', '') == "one": env['DATASET_CALIBRATION_FILE'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'calibration', 'SQuAD-v1.1', 'bert_calibration_features.txt') env['DATASET_CALIBRATION_ID'] = 1 - elif env.get('CM_SQUAD_CALIBRATION_SET', '') == "two": + elif env.get('CM_DATASET_SQUAD_CALIBRATION_SET', '') == "two": env['DATASET_CALIBRATION_FILE'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'calibration', 'SQuAD-v1.1', 'bert_calibration_qas_ids.txt') env['DATASET_CALIBRATION_ID'] = 2 else: @@ -55,8 +55,8 @@ def postprocess(i): else: with open("packed_filenames.txt", "w") as f: for dirname in os.listdir(cur): - if os.path.isdir(dirname): - f.write(os.path.join(cur, "input_ids.raw") + ", " + os.path.join(cur, "segment_ids.raw") + ", " + os.path.join(cur, "input_position_ids.raw")+ "\n") + if os.path.isdir(dirname) and not dirname.startswith("_"): + f.write(os.path.join(cur, dirname, "input_ids.raw") + "," + os.path.join(cur, dirname, "segment_ids.raw") + "," + os.path.join(cur, dirname, "input_position_ids.raw")+ "\n") env['CM_DATASET_SQUAD_TOKENIZED_PACKED_FILENAMES_FILE'] = os.path.join(cur, "packed_filenames.txt") return {'return':0} From 1ed44739505c7cfe2b816764edc0a7fb79544380 Mon Sep 17 00:00:00 2001 From: Arjun Date: Sat, 9 Dec 2023 15:18:24 +0000 Subject: [PATCH 2/5] Fixes for bert-qaic compilation --- .../script/app-mlperf-inference/customize.py | 8 +++++--- .../script/compile-model-for.qaic/_cm.json | 18 +++++++++++++----- .../script/compile-model-for.qaic/customize.py | 5 +++-- .../reproduce-mlperf-inference-kilt/_cm.yaml | 10 ++++++++-- .../customize.py | 6 +++--- 5 files changed, 32 insertions(+), 15 deletions(-) diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index aed25e2f0c..4ab6bf2f31 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -32,6 +32,7 @@ def postprocess(i): if env.get('CM_MLPERF_USER_CONF', '') == '': return {'return': 0} + output_dir = env['CM_MLPERF_OUTPUT_DIR'] mode = env['CM_MLPERF_LOADGEN_MODE'] @@ -61,7 +62,8 @@ def postprocess(i): model = env['CM_MODEL'] model_full_name = env.get('CM_ML_MODEL_FULL_NAME', model) - if model == "resnet50": + if mode ==accuracy: + if model == "resnet50": accuracy_filename = "accuracy-imagenet.py" accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", \ accuracy_filename) @@ -70,7 +72,7 @@ def postprocess(i): accuracy_log_file_option_name = " --mlperf-accuracy-file " datatype_option = " --dtype "+env['CM_IMAGENET_ACCURACY_DTYPE'] - elif model == "retinanet": + elif model == "retinanet": accuracy_filename = "accuracy-openimages.py" accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", \ accuracy_filename) @@ -78,7 +80,7 @@ def postprocess(i): accuracy_log_file_option_name = " --mlperf-accuracy-file " datatype_option = "" - elif 'bert' in model: + elif 'bert' in model: accuracy_filename = "accuracy-squad.py" accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_BERT_PATH'], accuracy_filename) dataset_args = " --val_data '" + env['CM_DATASET_SQUAD_VAL_PATH'] + "' --vocab_file '" + env['CM_DATASET_SQUAD_VOCAB_PATH'] + "' --out_file predictions.json " diff --git a/cm-mlops/script/compile-model-for.qaic/_cm.json b/cm-mlops/script/compile-model-for.qaic/_cm.json index dc817c2615..995b8487b9 100644 --- a/cm-mlops/script/compile-model-for.qaic/_cm.json +++ b/cm-mlops/script/compile-model-for.qaic/_cm.json @@ -59,7 +59,6 @@ "variations": { "bs.1": { "group": "batch-size", - "default": true, "env": { "CM_QAIC_MODEL_BATCH_SIZE": "1" }, @@ -217,15 +216,24 @@ "bert-99": { "adr": { "model-src": { - "tags": "bert-99,_onnx" + "tags": "bert-large,_onnx" } }, "env": { "CM_COMPILE_BERT": "on", "CM_QAIC_MODEL_TO_CONVERT": "calibrate_bert_mlperf", - "CM_QAIC_MODEL_COMPILER_ARGS": "-aic-hw -aic-hw-version=2.0 -execute-nodes-in-fp16=Mul,Sqrt,Div,Add,ReduceMean,Softmax,Sub,Gather,Erf,Pow,Concat,Tile,LayerNormalization -quantization-schema=symmetric_with_uint8 -quantization-precision=Int8 -quantization-precision-bias=Int32 -vvv -compile-only -onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,[SEG] -multicast-weights", - "CM_QAIC_MODEL_COMPILER_PARAMS": "-enable-channelwise -profiling-threads=32 -onnx-define-symbol=batch_size,[BATCH_SIZE] -node-precision-info=[NODE_PRECISION_FILE]" - } + "CM_QAIC_MODEL_COMPILER_ARGS": "-aic-hw -aic-hw-version=2.0 -execute-nodes-in-fp16=Mul,Sqrt,Div,Add,ReduceMean,Softmax,Sub,Gather,Erf,Pow,Concat,Tile,LayerNormalization -quantization-schema=symmetric_with_uint8 -quantization-precision=Int8 -quantization-precision-bias=Int32 -vvv -compile-only -onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,384 -multicast-weights", + "CM_QAIC_MODEL_COMPILER_PARAMS_BASE": "" + }, + "deps": [ + { + "tags": "calibrate,qaic,_bert-99", + "names": [ + "bert-profile", + "qaic-profile" + ] + } + ] } } } diff --git a/cm-mlops/script/compile-model-for.qaic/customize.py b/cm-mlops/script/compile-model-for.qaic/customize.py index 59c6fc923d..6791702fa7 100644 --- a/cm-mlops/script/compile-model-for.qaic/customize.py +++ b/cm-mlops/script/compile-model-for.qaic/customize.py @@ -27,7 +27,7 @@ def preprocess(i): def construct_compilation_cmd(env): compiler_params_base = env['CM_QAIC_MODEL_COMPILER_PARAMS_BASE'] compiler_args = env['CM_QAIC_MODEL_COMPILER_ARGS'] + ' ' + env.get('CM_QAIC_MODEL_COMPILER_ARGS_SUT', '') - batchsize = env['CM_QAIC_MODEL_BATCH_SIZE'] + batchsize = env.get('CM_QAIC_MODEL_BATCH_SIZE') if env.get('CM_QAIC_MODEL_QUANTIZATION', '') == 'yes': profile_string = " -load-profile=" + env['CM_QAIC_MODEL_PROFILE_WITH_PATH'] @@ -35,7 +35,8 @@ def construct_compilation_cmd(env): profile_string = '' compiler_params = compiler_params_base + ' ' + compiler_args - compiler_params += " -batchsize="+batchsize + if batchsize: + compiler_params += " -batchsize="+batchsize aic_binary_dir = os.path.join(os.getcwd(), "elfs") diff --git a/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml index cf39d9f557..5391434789 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml @@ -74,6 +74,7 @@ new_env_keys: - CM_ML_MODEL_* - CM_MAX_EXAMPLES - CM_IMAGENET_ACCURACY_DTYPE + - CM_SQUAD_ACCURACY_DTYPE # Dependencies on other CM scripts @@ -152,7 +153,7 @@ deps: - bert-99.9 names: - squad-tokenized - tags: get,dataset,tokenized,squad + tags: get,dataset,tokenized,squad,_raw ######################################################################## # Install OpenImages @@ -343,7 +344,7 @@ variations: CM_BENCHMARK: STANDALONE_BERT kilt_model_name: bert kilt_model_seq_length: 384 - kilt_model_batch_size: 384 + kilt_model_batch_size: 1 kilt_model_bert_variant: BERT_PACKED kilt_input_format: "INT64,1,384:INT64,1,8:INT64,1,384:INT64,1,384" kilt_output_format: "FLOAT32,1,384:FLOAT32,1,384" @@ -351,6 +352,11 @@ variations: loadgen_buffer_size: 10833 loadgen_dataset_size: 10833 + bert_,qaic: + env: + kilt_input_format: "UINT32,1,384:UINT32,1,8:UINT32,1,384:UINT32,1,384" + kilt_device_qaic_skip_stage: convert + standalone: group: run-mode default: true diff --git a/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py b/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py index d03ba0856c..84169b3cb3 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py @@ -42,9 +42,9 @@ def preprocess(i): elif "bert" in env.get('CM_MODEL'): env['dataset_squad_tokenized_max_seq_length'] = env['CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH'] env['dataset_squad_tokenized_root'] = env['CM_DATASET_SQUAD_TOKENIZED_ROOT'] - env['dataset_squad_tokenized_input_ids'] = env['CM_DATASET_SQUAD_TOKENIZED_INPUT_IDS'] - env['dataset_squad_tokenized_input_mask'] = env['CM_DATASET_SQUAD_TOKENIZED_INPUT_MASK'] - env['dataset_squad_tokenized_segment_ids'] = env['CM_DATASET_SQUAD_TOKENIZED_SEGMENT_IDS'] + env['dataset_squad_tokenized_input_ids'] = os.path.basename(env['CM_DATASET_SQUAD_TOKENIZED_INPUT_IDS']) + env['dataset_squad_tokenized_input_mask'] = os.path.basename(env['CM_DATASET_SQUAD_TOKENIZED_INPUT_MASK']) + env['dataset_squad_tokenized_segment_ids'] = os.path.basename(env['CM_DATASET_SQUAD_TOKENIZED_SEGMENT_IDS']) if env.get('CM_BENCHMARK', '') == 'NETWORK_BERT_SERVER': source_files.append(os.path.join(kilt_root, "benchmarks", "network", "bert", "server", "pack.cpp")) From 2134492cfe3a8091106b2f9d4c18940b52d8ac6b Mon Sep 17 00:00:00 2001 From: Arjun Date: Sat, 9 Dec 2023 18:31:17 +0000 Subject: [PATCH 3/5] Fixes for bert qaic run --- .../script/app-mlperf-inference/customize.py | 2 +- .../script/calibrate-model-for.qaic/_cm.json | 6 +++++ .../script/compile-model-for.qaic/_cm.json | 26 ++++++++++++++++++- .../reproduce-mlperf-inference-kilt/_cm.yaml | 26 +++++++++++++++---- .../customize.py | 2 +- 5 files changed, 54 insertions(+), 8 deletions(-) diff --git a/cm-mlops/script/app-mlperf-inference/customize.py b/cm-mlops/script/app-mlperf-inference/customize.py index 4ab6bf2f31..8cc12b3f28 100644 --- a/cm-mlops/script/app-mlperf-inference/customize.py +++ b/cm-mlops/script/app-mlperf-inference/customize.py @@ -62,7 +62,7 @@ def postprocess(i): model = env['CM_MODEL'] model_full_name = env.get('CM_ML_MODEL_FULL_NAME', model) - if mode ==accuracy: + if mode == "accuracy": if model == "resnet50": accuracy_filename = "accuracy-imagenet.py" accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", \ diff --git a/cm-mlops/script/calibrate-model-for.qaic/_cm.json b/cm-mlops/script/calibrate-model-for.qaic/_cm.json index 9406504c98..c8303e4f9c 100644 --- a/cm-mlops/script/calibrate-model-for.qaic/_cm.json +++ b/cm-mlops/script/calibrate-model-for.qaic/_cm.json @@ -55,6 +55,12 @@ "tags": "get,dataset,imagenet,preprocessed,_calibration,_for.resnet50" }, { + "enable_if_env": + { + "CM_CALIBRATE_SQUAD": [ + "on" + ] + }, "names": [ "squad-cal", "preprocessed-dataset" diff --git a/cm-mlops/script/compile-model-for.qaic/_cm.json b/cm-mlops/script/compile-model-for.qaic/_cm.json index 995b8487b9..fc285dd081 100644 --- a/cm-mlops/script/compile-model-for.qaic/_cm.json +++ b/cm-mlops/script/compile-model-for.qaic/_cm.json @@ -147,7 +147,7 @@ }, "resnet50,server,nsp.14": { "env": { - "CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=48 -ols=4" + "CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=4 -ols=4" }, "default_variations": { "batch-size": "bs.8" @@ -185,6 +185,30 @@ "CM_QAIC_MODEL_COMPILER_ARGS": "-sdp-cluster-sizes=4,4 -mos=1,4" } }, + "bert-99,offline": { + "env": { + } + }, + "bert-99,offline,nsp.14": { + "env": { + "CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=1 -mos=1 -ols=3" + } + }, + "bert-99,server,nsp.14": { + "env": { + "CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=1 -mos=1 -ols=3" + } + }, + "bert-99,multistream,nsp.14": { + "env": { + "CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=4" + } + }, + "bert-99,singlestream,nsp.14": { + "env": { + "CM_QAIC_MODEL_COMPILER_ARGS_SUT": "-aic-num-cores=8" + } + }, "tf": { "group": "model-framework" }, diff --git a/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml index 5391434789..8f1e288fd3 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-kilt/_cm.yaml @@ -390,6 +390,7 @@ variations: - bert_ env: CM_MODEL: bert-99 + CM_SQUAD_ACCURACY_DTYPE: float32 CM_NOT_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://zenodo.org/record/3750364/files/bert_large_v1_1_fake_quant.onnx" bert-99.9: @@ -428,19 +429,31 @@ variations: env: CM_MLPERF_LOADGEN_SCENARIO: SingleStream kilt_model_batch_size: 1 + adr: + qaic-model-compiler: + tags: _singlestream multistream: group: loadgen-scenario env: CM_MLPERF_LOADGEN_SCENARIO: MultiStream + adr: + qaic-model-compiler: + tags: _multistream offline: group: loadgen-scenario env: CM_MLPERF_LOADGEN_SCENARIO: Offline + adr: + qaic-model-compiler: + tags: _offline server: group: loadgen-scenario env: CM_MLPERF_LOADGEN_SCENARIO: Server + adr: + qaic-model-compiler: + tags: _server uint8: group: precision @@ -467,15 +480,18 @@ variations: base: - nsp.14 env: - kilt_device_ids: 0,1,2,3,4,5,6,7 + kilt_device_ids: "0" qaic_queue_length: 6 dl2q.24xlarge,singlestream: env: kilt_device_ids: 0 - base: - - activation-count.1 + qaic_activation_count: "1" dl2q.24xlarge,resnet50,offline: - base: - - activation-count.3 + env: + qaic_activation_count: "3" + + dl2q.24xlarge,bert-99,offline: + env: + qaic_activation_count: "14" diff --git a/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py b/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py index 84169b3cb3..6f0af2607d 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py +++ b/cm-mlops/script/reproduce-mlperf-inference-kilt/customize.py @@ -96,7 +96,7 @@ def preprocess(i): env['+ CXXFLAGS'].append("-DKILT_DEVICE_" + env['device'].upper()) # add preprocessor flag like "#define CM_MODEL_RESNET50" - env['+ CXXFLAGS'].append('-DCM_MODEL_' + env['CM_MODEL'].upper()) + #env['+ CXXFLAGS'].append('-DCM_MODEL_' + env['CM_MODEL'].upper()) # add preprocessor flag like "#define CM_MLPERF_BACKEND_ONNXRUNTIME" env['+ CXXFLAGS'].append('-DCM_MLPERF_BACKEND_' + env['CM_MLPERF_BACKEND'].upper()) # add preprocessor flag like "#define CM_MLPERF_DEVICE_CPU" From a508dc12b059e6215fc8a0852aa59135adcb3874 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 12 Dec 2023 15:59:49 +0000 Subject: [PATCH 4/5] Create README_aws_dl2q.24xlarge.md --- .../README_aws_dl2q.24xlarge.md | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md diff --git a/cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md b/cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md new file mode 100644 index 0000000000..5bada43be2 --- /dev/null +++ b/cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md @@ -0,0 +1,25 @@ +# MLPerf Inference Benchmarking on AWS dl2q.24xlarge instance using 8 QAIC Cloud AI 100 + +`dl2q.24xlarge` instance is available in `us-west-2d` and it has 96 vCPUs and 768 GB of memory. + +[Deep Learning Base Qualcomm AMI (Amazon Linux 2)](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#Images:visibility=public-images;imageId=ami-0287712deef96ecc6) image is recommended OS image as it comes with the QIAC SDKs (both Apps and Platform) preinstalled. + + +## System setup +``` +yum install -y python3-devel git +python3 -m pip install cmind +cm pull repo mlcommons@ck +``` + +## ResNet50 + +Do a performance run for the Offline scenario + +``` +cm run script --tags=generate-run-cmds,inference,_performance-only --device=qaic --backend=glow \ +--scenario=Offline --implementation=kilt --model=resnet50 \ +--test_query_count=40000 --precision=fp32 --rerun +``` + +*WIP* From c7a18ffe909c34b3d62ed81400c75e2214a728ac Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 12 Dec 2023 16:04:21 +0000 Subject: [PATCH 5/5] Update README_aws_dl2q.24xlarge.md --- .../README_aws_dl2q.24xlarge.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md b/cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md index 5bada43be2..a95640034b 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md +++ b/cm-mlops/script/reproduce-mlperf-inference-kilt/README_aws_dl2q.24xlarge.md @@ -22,4 +22,7 @@ cm run script --tags=generate-run-cmds,inference,_performance-only --device=qaic --test_query_count=40000 --precision=fp32 --rerun ``` +* `--adr.lperf-inference-implementation.device_ids=0` can be used to run the inference only on the first QAIC device +* `--precision=uint8` is the best option to be used but unfortunately, it is not working with the default platform SDK. When we use `--precision=fp32` the float32 inputs are on the fly converted by the QAIC driver to uint8 format. This overhead and 4x memory BW usage reduces the Offline scenario performance by nearly 50%. We got `~9000` QPS for a single device run + *WIP*