python_experiments/run_our_executables_reordered_graph.py

import socket
import time_out_util
from config import *
from exec_utils import *


def run_exp(env_tag=knl_tag, data_path_tag=exec_path_tag):
    with open('config.json') as ifs:
        my_config_dict = json.load(ifs)[env_tag]
    ######################
    our_exec_path = my_config_dict[data_path_tag]
    data_set_path = my_config_dict[data_set_path_tag]
    ######################

    our_exec_name_lst = filter(lambda exec_name: 'cuda' not in exec_name, my_config_dict[exec_lst_tag])

    print 'our exec folder', our_exec_path
    print 'our exec name list', our_exec_name_lst

    data_set_lst = my_config_dict[data_set_lst_tag]
    if env_tag is gpu_other_tag:
        data_set_lst = my_config_dict[data_set_lst_tag][-2:-1]
    elif env_tag is gpu23_tag:
        data_set_lst = my_config_dict[data_set_lst_tag]

    exp_res_root_name = 'exp_results'
    folder_name = 'exp-01-22-study-reordering' + '-' + env_tag

    # parameters
    eps_lst = [0.2]
    mu_lst = [5]

    def one_round(reorder_method='.'):
        for data_set_name in data_set_lst:
            # for data_set_name in ['webgraph_twitter']:
            for our_algorithm in our_exec_name_lst:
                def is_time_out(name):
                    if env_tag is gpu23_tag or env_tag is gpu_other_tag:
                        return 'roaring' in name or 'bsr' in name
                    elif env_tag is knl_tag:
                        return 'roaring' in name
                    return False

                if data_set_name == 'snap_friendster' and is_time_out(our_algorithm):
                    continue
                if data_path_tag is exec_path_non_hbw_tag and 'hbw' in our_algorithm:
                    continue
                for eps in eps_lst:
                    for mu in mu_lst:
                        thread_num_lst = my_config_dict[thread_num_lst_tag]

                        for t_num in thread_num_lst:
                            statistics_dir = os.sep.join(
                                map(str,
                                    ['.',
                                     exp_res_root_name,
                                     folder_name + '-non-hbw' if data_path_tag is exec_path_non_hbw_tag else folder_name,
                                     data_set_name,
                                     reorder_method,
                                     # eps,
                                     mu, t_num]))
                            os.system('mkdir -p ' + statistics_dir)
                            statistics_file_path = statistics_dir + os.sep + our_algorithm + '.txt'
                            print statistics_file_path
                            # if the graph not exist
                            graph_dir = os.sep.join([data_set_path, data_set_name, reorder_method])
                            if not os.path.exists(graph_dir):
                                print 'Not exist' + graph_dir
                                os.system('echo graph not exist >> ' + statistics_file_path)
                                continue

                            rm_cmd = 'rm ' + os.sep.join([graph_dir,
                                                          'scanxp-result-' + str(eps) + '-' + str(mu) + '.txt'])
                            print rm_cmd
                            os.system(rm_cmd)

                            # 1st: write header
                            os.system(
                                ' '.join(
                                    ['echo', my_splitter + time.ctime() + my_splitter, '>>', statistics_file_path]))

                            # 2nd: run exec cmd
                            algorithm_path = our_exec_path + os.sep + our_algorithm
                            params_lst = map(str,
                                             [algorithm_path,
                                              graph_dir,
                                              eps, mu, t_num, statistics_file_path, '> /dev/null 2>&1'])
                            cmd = ' '.join(params_lst)
                            print 'cmd: ', cmd
                            time_out = 600

                            my_env = os.environ.copy()
                            tle_flag, info, correct_info = time_out_util.run_with_timeout(cmd, timeout_sec=time_out,
                                                                                          env=my_env)
                            if tle_flag:
                                break

                            # check md5 for correctness, assuming result generated by ppSCAN
                            def check_result():
                                tmp_lst = []
                                gt_res_file_name = '-'.join(map(str, ['result', eps, mu])) + '.txt'
                                res_file_path = os.sep.join(
                                    [graph_dir, 'scanxp-' + gt_res_file_name])
                                tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout(
                                    ' '.join(['md5sum', res_file_path]),
                                    timeout_sec=time_out)
                                tmp_lst.append(tmp_info + tmp_correct_info)

                                tmp_tle_flag, tmp_info, tmp_correct_info = time_out_util.run_with_timeout(
                                    ' '.join(['md5sum', os.sep.join(
                                        [graph_dir, gt_res_file_name])]),
                                    timeout_sec=time_out)
                                tmp_lst.append(tmp_info + tmp_correct_info)
                                print tmp_info, tmp_lst
                                os.system(' '.join(['echo', str(tmp_lst), '>>', statistics_file_path]))
                                if len(tmp_lst[0].split()) > 0 and len(tmp_lst[1].split()) > 0 and \
                                        tmp_lst[0].split()[0] == tmp_lst[1].split()[0]:
                                    print 'Correct'
                                    return True
                                else:
                                    print '\n'.join(map(str, tmp_lst))
                                    return 'False\n' + '\n'.join(map(str, tmp_lst))

                            # 3rd: append outputs
                            write_split(statistics_file_path)
                            with open(statistics_file_path, 'a+') as ifs:
                                ifs.write(correct_info)
                                ifs.write('\nis_time_out:' + str(tle_flag))
                                ifs.write('\nis_correct:' + str(check_result()) + '\n')
                                ifs.write(my_splitter + time.ctime() + my_splitter)
                                ifs.write('\n\n\n\n')
                            print 'finish:', cmd

    # reorder_method_lst = ['gro', 'cache', 'hybrid', 'slashburn', 'bfsr', 'dfs']
    if hostname.startswith('gpu13'):
        reorder_method_lst = ['gro']
    elif hostname.startswith('gpu17'):
        reorder_method_lst = ['cache']
    elif hostname.startswith('gpu18'):
        # reorder_method_lst = ['hybrid']
        reorder_method_lst = ['rcm-cache']
    elif hostname.startswith('gpu19'):
        reorder_method_lst = ['slashburn']
    elif hostname.startswith('gpu12'):
        reorder_method_lst = ['bfsr']
    elif hostname.startswith('gpu21'):
        reorder_method_lst = ['dfs']
    elif hostname.startswith('gpu22'):
        reorder_method_lst = ['.']
    elif hostname.startswith('gpu23'):
        # reorder_method_lst = ['.', 'hybrid', 'slashburn', 'bfsr', 'dfs']
        reorder_method_lst = ['.', 'gro', 'cache', 'hybrid', 'slashburn', 'bfsr', 'dfs']
    else:
        reorder_method_lst = ['.', 'gro', 'cache', 'hybrid', 'slashburn', 'bfsr', 'dfs']

    for reorder_method in reorder_method_lst:
        one_round(reorder_method)


if __name__ == '__main__':
    hostname = socket.gethostname()
    if hostname.startswith('lccpu12'):
        run_exp(env_tag=lccpu12_tag)
    elif hostname.startswith('gpu23'):
        run_exp(env_tag=gpu23_tag)
    elif hostname.startswith('gpu'):
        run_exp(env_tag=gpu_other_tag)
    else:
        # run_exp(env_tag=knl_tag, data_path_tag=exec_path_tag)
        run_exp(env_tag=knl_tag, data_path_tag=exec_path_non_hbw_tag)