Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge from ctuning #1047

Merged
merged 20 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions cm-mlops/automation/cache/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,32 @@ def search(self, i):

# Find CM artifact(s)
return self.cmind.access(i)


############################################################
def copy_to_remote(self, i):
"""
Add CM automation.

Args:
(CM input dict):

(out) (str): if 'con', output to console

parsed_artifact (list): prepared in CM CLI or CM access function
[ (artifact alias, artifact UID) ] or
[ (artifact alias, artifact UID), (artifact repo alias, artifact repo UID) ]

(repos) (str): list of repositories to search for automations (internal & mlcommons@ck by default)

(output_dir) (str): output directory (./ by default)

Returns:
(CM return dict):

* return (int): return code == 0 if no error and >0 if error
* (error) (str): error string if return>0

"""

return utils.call_internal_module(self, __file__, 'module_misc', 'copy_to_remote', i)
98 changes: 98 additions & 0 deletions cm-mlops/automation/cache/module_misc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os
from cmind import utils


############################################################
def copy_to_remote(i):
"""
Add CM automation.

Args:
(CM input dict):

(out) (str): if 'con', output to console

parsed_artifact (list): prepared in CM CLI or CM access function
[ (artifact alias, artifact UID) ] or
[ (artifact alias, artifact UID), (artifact repo alias, artifact repo UID) ]

(repos) (str): list of repositories to search for automations (internal & mlcommons@ck by default)

(output_dir) (str): output directory (./ by default)

Returns:
(CM return dict):

* return (int): return code == 0 if no error and >0 if error
* (error) (str): error string if return>0

"""

self_module = i['self_module']

remote_host = i.get('remote_host')
if not remote_host:
return {'return':1, 'error': 'Please input remote host_name/IP via --remote_host'}
remote_cm_repos_location = i.get('remote_cm_repos_location', os.path.join("/home", os.getlogin(), "CM", "repos"))
remote_cm_cache_location = os.path.join(remote_cm_repos_location, "local", "cache")

remote_port = i.get('remote_port', '22')
remote_user = i.get('remote_user', os.getlogin())

tag_string = i['tags']
tag_string += ",-tmp"

cm_input = {'action': 'show',
'automation': 'cache',
'tags': f'{tag_string}',
'quiet': True
}
r = self_module.cmind.access(cm_input)
if r['return'] > 0:
return r

if len(r['list']) == 0:
pass #fixme
elif len(r['list']) > 1:
print("Multiple cache entries found: ")
for k in sorted(r['list'], key = lambda x: x.meta.get('alias','')):
print(k.path)
x = input("Would you like to copy them all? Y/n: ")
if x.lower() == 'n':
return {'return': 0}

import json

for k in sorted(r['list'], key = lambda x: x.meta.get('alias','')):
path = k.path
cacheid = os.path.basename(path)

copy_cmd = f"rsync -avz --exclude cm-cached-state.json -e 'ssh -p {remote_port}' {path} {remote_user}@{remote_host}:{remote_cm_cache_location}"
print(copy_cmd)
os.system(copy_cmd)

cm_cached_state_json_file = os.path.join(path, "cm-cached-state.json")
if not os.path.exists(cm_cached_state_json_file):
return {'return':1, 'error': f'cm-cached-state.json file missing in {path}'}

with open(cm_cached_state_json_file, "r") as f:
cm_cached_state = json.load(f)

new_env = cm_cached_state['new_env']
new_state = cm_cached_state['new_state'] # Todo fix new state
cm_repos_path = os.environ.get('CM_REPOS', os.path.join(os.path.expanduser("~"), "CM", "repos"))
cm_cache_path = os.path.realpath(os.path.join(cm_repos_path, "local", "cache"))

for key,val in new_env.items():
if type(val) == str and cm_cache_path in val:
new_env[key] = val.replace(cm_cache_path, remote_cm_cache_location)

with open("tmp_remote_cached_state.json", "w") as f:
json.dump(cm_cached_state, f, indent=2)

remote_cached_state_file_location = os.path.join(remote_cm_cache_location, cacheid, "cm-cached-state.json")
copy_cmd = f"rsync -avz -e 'ssh -p {remote_port}' tmp_remote_cached_state.json {remote_user}@{remote_host}:{remote_cached_state_file_location}"
print(copy_cmd)
os.system(copy_cmd)

return {'return':0}
12 changes: 11 additions & 1 deletion cm-mlops/automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -1694,6 +1694,15 @@ def run(self, i):

def _update_variation_tags_from_variations(self, variation_tags, variations, variation_groups, excluded_variation_tags):

import copy
tmp_variation_tags_static = copy.deepcopy(variation_tags)
for v_i in range(len(tmp_variation_tags_static)):
v = tmp_variation_tags_static[v_i]

if v not in variations:
v_static = self._get_name_for_dynamic_variation_tag(v)
tmp_variation_tags_static[v_i] = v_static

# Recursively add any base variations specified
if len(variation_tags) > 0:
tmp_variations = {k: False for k in variation_tags}
Expand Down Expand Up @@ -1748,7 +1757,7 @@ def _update_variation_tags_from_variations(self, variation_tags, variations, var

unique_allowed_variations = variation_groups[default_base_variation]['variations']
# add the default only if none of the variations from the current group is selected and it is not being excluded with - prefix
if len(set(unique_allowed_variations) & set(variation_tags)) == 0 and default_base_variations[default_base_variation] not in excluded_variation_tags and default_base_variations[default_base_variation] not in variation_tags:
if len(set(unique_allowed_variations) & set(tmp_variation_tags_static)) == 0 and default_base_variations[default_base_variation] not in excluded_variation_tags and default_base_variations[default_base_variation] not in tmp_variation_tags_static:
tag_to_append = default_base_variations[default_base_variation]

if tag_to_append:
Expand Down Expand Up @@ -3413,6 +3422,7 @@ def docker(self, i):

return utils.call_internal_module(self, __file__, 'module_misc', 'docker', i)


##############################################################################
def _available_variations(self, i):
"""
Expand Down
2 changes: 1 addition & 1 deletion cm-mlops/script/app-mlperf-inference/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def postprocess(i):
accuracy_filename = "accuracy-openimages.py"
accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", \
accuracy_filename)
dataset_args = " --openimages-dir " + env['CM_DATASET_PATH']
dataset_args = " --openimages-dir " + os.getcwd() #just to make the script happy
accuracy_log_file_option_name = " --mlperf-accuracy-file "
datatype_option = ""

Expand Down
64 changes: 43 additions & 21 deletions cm-mlops/script/calibrate-model-for.qaic/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
"variations": {
"bs.1": {
"group": "batch-size",
"default": true,
"env": {
"CM_QAIC_MODEL_BATCH_SIZE": "1",
"CM_CREATE_INPUT_BATCH": "yes"
Expand Down Expand Up @@ -117,6 +118,7 @@
"model-framework": "tf"
},
"env": {
"CM_QAIC_MODEL_NAME": "resnet50",
"CM_CALIBRATE_IMAGENET": "yes",
"CM_QAIC_COMPILER_ARGS": "",
"CM_QAIC_COMPILER_PARAMS": "-output-node-name=ArgMax -profiling-threads=<<<CM_HOST_CPU_PHYSICAL_CORES_PER_SOCKET>>>",
Expand Down Expand Up @@ -152,49 +154,69 @@
"tags": "retinanet,_no-nms,_onnx"
}
},
"new_env_keys": [
"CM_QAIC_MODEL_RETINANET_*"
],
"env": {
"CM_QAIC_MODEL_NAME": "retinanet",
"CM_CALIBRATE_OPENIMAGES": "yes",
"CM_QAIC_COMPILER_ARGS": "",
"CM_QAIC_COMPILER_PARAMS": "-enable-channelwise -profiling-threads=<<<CM_HOST_CPU_PHYSICAL_CORES_PER_SOCKET>>> -onnx-define-symbol=batch_size,<<<CM_QAIC_MODEL_BATCH_SIZE>>> -node-precision-info=<<<CM_ML_MODEL_RETINANET_QAIC_NODE_PRECISION_INFO_FILE_PATH>>>",
"CM_QAIC_MODEL_TO_CONVERT": "calibrate_retinanet_no_nms_mlperf"
}
},
"bert-99": {
"group": "model",
"bert_": {
"default-variations": {
"seq-length": "seq.384"
},
"env": {
"CM_QAIC_MODEL_NAME": "bert-large"
},
"adr": {
"model-src": {
"tags": "bert-large,_onnx,_packed"
}
},
}
},
"bert-99": {
"group": "model",
"base": [
"bert_"
],
"env": {
"CM_CALIBRATE_SQUAD": "yes",
"CM_QAIC_COMPILER_ARGS": "",
"CM_QAIC_COMPILER_PARAMS": "-onnx-define-symbol=batch_size,1 -onnx-define-symbol=seg_length,<<<CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH>>> -input-list-file=<<<CM_DATASET_SQUAD_TOKENIZED_PACKED_FILENAMES_FILE>>> -num-histogram-bins=512 -profiling-threads=<<<CM_HOST_CPU_PHYSICAL_CORES_PER_SOCKET>>>",
"CM_QAIC_MODEL_TO_CONVERT": "calibrate_bert_mlperf"
}
},
"seq.#": {
"group": "seq-length",
"env": {
"CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH": "#"
},
"seq.#": {
"group": "seq-length",
"env": {
"CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH": "#"
},
"ad": {
"squad-preprocessed": {
"tags": "_seq.#"
}
"ad": {
"squad-preprocessed": {
"tags": "_seq.#"
}
}
},
"seq.384": {
"group": "seq-length",
"env": {
"CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH": "#"
},
"seq.384": {
"group": "seq-length",
"env": {
"CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH": "#"
},
"ad": {
"squad-preprocessed": {
"tags": "_seq.384"
}
"ad": {
"squad-preprocessed": {
"tags": "_seq.384"
}
}
},
"filter-size.#": {
"group": "calib-dataset-filter-size",
"ad": {
"preprocessed-dataset": {
"tags": "_filter-size.#,_filter,_size.#"
}
}
}
Expand Down
Loading